diff --git a/latest/.buildinfo b/latest/.buildinfo
index 239d52c49f..ea8c7b4edc 100644
--- a/latest/.buildinfo
+++ b/latest/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: e432c3509163ef03323e39d8537d99ca
+config: 370ff5f62df7a02937391c16812e12e3
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/latest/_cpp_gen/executor.html b/latest/_cpp_gen/executor.html
index e8b4a9df9a..b60b4a4619 100644
--- a/latest/_cpp_gen/executor.html
+++ b/latest/_cpp_gen/executor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1310,11 +1312,6 @@
 <span id="_CPPv3N12tensorrt_llm8executor9TensorPtrE"></span><span id="_CPPv2N12tensorrt_llm8executor9TensorPtrE"></span><span class="target" id="types_8h_1a32a3846eb7d506ec2f4699f052f54dda"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TensorPtr</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor6TensorE" title="tensorrt_llm::executor::Tensor"><span class="n"><span class="pre">Tensor</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor9TensorPtrE" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
-<dl class="cpp type">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor10SizeType32E">
-<span id="_CPPv3N12tensorrt_llm8executor10SizeType32E"></span><span id="_CPPv2N12tensorrt_llm8executor10SizeType32E"></span><span class="target" id="types_8h_1ad818c2e487265ea3ec0ddd760b768085"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SizeType32</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">int32_t</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="Link to this definition">#</a><br /></dt>
-<dd></dd></dl>
-
 <dl class="cpp type">
 <dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor10SizeType64E">
 <span id="_CPPv3N12tensorrt_llm8executor10SizeType64E"></span><span id="_CPPv2N12tensorrt_llm8executor10SizeType64E"></span><span class="target" id="types_8h_1acda8a22d5fd4b8f6f92ce04c779cf088"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SizeType64</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">int64_t</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor10SizeType64E" title="Link to this definition">#</a><br /></dt>
@@ -3144,6 +3141,11 @@
 <span id="_CPPv3NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv"></span><span id="_CPPv2NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv"></span><span id="tensorrt_llm::executor::kv_cache::ConnectionManager::getCommStateC"></span><span class="target" id="classtensorrt__llm_1_1executor_1_1kv__cache_1_1ConnectionManager_1a1891e3f7d95d10d503768aa993b6debf"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE" title="tensorrt_llm::executor::kv_cache::CommState"><span class="n"><span class="pre">CommState</span></span></a><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">getCommState</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
+<dl class="cpp function">
+<dt class="sig sig-object cpp" id="_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv">
+<span id="_CPPv3NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv"></span><span id="_CPPv2NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv"></span><span id="tensorrt_llm::executor::kv_cache::ConnectionManager::isRunningC"></span><span class="target" id="classtensorrt__llm_1_1executor_1_1kv__cache_1_1ConnectionManager_1ab3ba71ff7909d1460d7086ac34e2064e"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">isRunning</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv" title="Link to this definition">#</a><br /></dt>
+<dd></dd></dl>
+
 </div>
 </dd></dl>
 
@@ -3943,6 +3945,16 @@
 <span class="target" id="namespacetensorrt__llm_1_1executor"></span><span class="k"><span class="pre">namespace</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">executor</span></span></span><br /></dt>
 <dd><div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-typedefs">Typedefs</p>
+<dl class="cpp type">
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor10SizeType32E">
+<span id="_CPPv3N12tensorrt_llm8executor10SizeType32E"></span><span id="_CPPv2N12tensorrt_llm8executor10SizeType32E"></span><span id="tensorrt_llm::executor::SizeType32"></span><span class="target" id="executor_8h_1ac776d7ebaecdc4148fadc5c154f15901"></span><span class="k"><span class="pre">typedef</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv412tensorrt_llm" title="tensorrt_llm"><span class="n"><span class="pre">tensorrt_llm</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtimeE" title="tensorrt_llm::runtime"><span class="n"><span class="pre">runtime</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="runtime.html#_CPPv4N12tensorrt_llm7runtime10SizeType32E" title="tensorrt_llm::runtime::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SizeType32</span></span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="Link to this definition">#</a><br /></dt>
+<dd></dd></dl>
+
+<dl class="cpp type">
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor5MmKeyE">
+<span id="_CPPv3N12tensorrt_llm8executor5MmKeyE"></span><span id="_CPPv2N12tensorrt_llm8executor5MmKeyE"></span><span class="target" id="executor_8h_1a7ae0b1ae480fc64635877ec4a3477e61"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MmKey</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">pair</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">array</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">uint8_t</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="m"><span class="pre">32</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="tensorrt_llm::executor::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor5MmKeyE" title="Link to this definition">#</a><br /></dt>
+<dd></dd></dl>
+
 <dl class="cpp type">
 <dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor17RetentionPriorityE">
 <span id="_CPPv3N12tensorrt_llm8executor17RetentionPriorityE"></span><span id="_CPPv2N12tensorrt_llm8executor17RetentionPriorityE"></span><span class="target" id="executor_8h_1a7d47a118ea2835238c34ba65f7ac692e"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">RetentionPriority</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="tensorrt_llm::executor::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor17RetentionPriorityE" title="Link to this definition">#</a><br /></dt>
@@ -6921,8 +6933,8 @@
 <div class="breathe-sectiondef docutils container">
 <p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
 <dl class="cpp function">
-<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32">
-<span id="_CPPv3N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32"></span><span id="_CPPv2N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32"></span><span id="tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData__IdType.tensorrt_llm::runtime::VecUniqueTokens.std::optional:tensorrt_llm::runtime::LoraTaskIdType:.SizeType32.SizeType32"></span><span class="target" id="structtensorrt__llm_1_1executor_1_1KVCacheStoredBlockData_1af6cc9927cdb952318da4d2eb2cf6eb31"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">KVCacheStoredBlockData</span></span></span><span class="sig-paren">(</span>
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE">
+<span id="_CPPv3N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE"></span><span id="_CPPv2N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE"></span><span id="tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData__IdType.tensorrt_llm::runtime::VecUniqueTokens.std::optional:tensorrt_llm::runtime::LoraTaskIdType:.SizeType32.SizeType32.std::vector:MmKey:"></span><span class="target" id="structtensorrt__llm_1_1executor_1_1KVCacheStoredBlockData_1a4397fd1bbe33809586fc5da7df6c9402"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">KVCacheStoredBlockData</span></span></span><span class="sig-paren">(</span>
 
 <dl>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor6IdTypeE" title="tensorrt_llm::executor::IdType"><span class="n"><span class="pre">IdType</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">blockHash</span></span></em>,</dd>
@@ -6930,9 +6942,10 @@
 <dd><em class="sig-param"><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv412tensorrt_llm" title="tensorrt_llm"><span class="n"><span class="pre">tensorrt_llm</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm7runtimeE" title="tensorrt_llm::runtime"><span class="n"><span class="pre">runtime</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="runtime.html#_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE" title="tensorrt_llm::runtime::LoraTaskIdType"><span class="n"><span class="pre">LoraTaskIdType</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">loraId</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="tensorrt_llm::executor::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">cacheLevel</span></span></em>,</dd>
 <dd><em class="sig-param"><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E" title="tensorrt_llm::executor::SizeType32"><span class="n"><span class="pre">SizeType32</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">priority</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor5MmKeyE" title="tensorrt_llm::executor::MmKey"><span class="n"><span class="pre">MmKey</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">mmKeys</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span></em>,</dd>
 </dl>
 
-<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32" title="Link to this definition">#</a><br /></dt>
+<span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE" title="Link to this definition">#</a><br /></dt>
 <dd></dd></dl>
 
 </div>
@@ -6968,6 +6981,12 @@
 <dd><p>The priority of the block. </p>
 </dd></dl>
 
+<dl class="cpp var">
+<dt class="sig sig-object cpp" id="_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE">
+<span id="_CPPv3N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE"></span><span id="_CPPv2N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE"></span><span id="tensorrt_llm::executor::KVCacheStoredBlockData::mmKeys__std::vector:MmKey:"></span><span class="target" id="structtensorrt__llm_1_1executor_1_1KVCacheStoredBlockData_1a51df5f1ec916092eded38a2e0202f717"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N12tensorrt_llm8executor5MmKeyE" title="tensorrt_llm::executor::MmKey"><span class="n"><span class="pre">MmKey</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">mmKeys</span></span></span><a class="headerlink" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE" title="Link to this definition">#</a><br /></dt>
+<dd><p>The multimodal keys of the block. </p>
+</dd></dl>
+
 </div>
 </dd></dl>
 
@@ -12245,7 +12264,6 @@
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#types-h">types.h</a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor9TensorPtrE"><code class="docutils literal notranslate"><span class="pre">TensorPtr</span></code></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E"><code class="docutils literal notranslate"><span class="pre">SizeType32</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor10SizeType64E"><code class="docutils literal notranslate"><span class="pre">SizeType64</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor9FloatTypeE"><code class="docutils literal notranslate"><span class="pre">FloatType</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor11TokenIdTypeE"><code class="docutils literal notranslate"><span class="pre">TokenIdType</span></code></a></li>
@@ -12619,6 +12637,7 @@
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t"><code class="docutils literal notranslate"><span class="pre">recvConnect()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState"><code class="docutils literal notranslate"><span class="pre">getConnections()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv"><code class="docutils literal notranslate"><span class="pre">getCommState()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv"><code class="docutils literal notranslate"><span class="pre">isRunning()</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm::executor::kv_cache::DataContext</span></code></a><ul class="nav section-nav flex-column">
@@ -12729,6 +12748,8 @@
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm::batch_manager::kv_cache_manager</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor10SizeType32E"><code class="docutils literal notranslate"><span class="pre">SizeType32</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor5MmKeyE"><code class="docutils literal notranslate"><span class="pre">MmKey</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor17RetentionPriorityE"><code class="docutils literal notranslate"><span class="pre">RetentionPriority</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE"><code class="docutils literal notranslate"><span class="pre">KVCacheEventData</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor7versionEv"><code class="docutils literal notranslate"><span class="pre">version()</span></code></a></li>
@@ -13177,12 +13198,13 @@
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm::executor::KVCacheStoredBlockData</span></code></a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32"><code class="docutils literal notranslate"><span class="pre">KVCacheStoredBlockData()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE"><code class="docutils literal notranslate"><span class="pre">KVCacheStoredBlockData()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE"><code class="docutils literal notranslate"><span class="pre">blockHash</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE"><code class="docutils literal notranslate"><span class="pre">tokens</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE"><code class="docutils literal notranslate"><span class="pre">loraId</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE"><code class="docutils literal notranslate"><span class="pre">cacheLevel</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE"><code class="docutils literal notranslate"><span class="pre">priority</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE"><code class="docutils literal notranslate"><span class="pre">mmKeys</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm::executor::KVCacheStoredData</span></code></a><ul class="nav section-nav flex-column">
@@ -13989,9 +14011,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_cpp_gen/runtime.html b/latest/_cpp_gen/runtime.html
index 9d3c955431..15991d2865 100644
--- a/latest/_cpp_gen/runtime.html
+++ b/latest/_cpp_gen/runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -14701,9 +14703,9 @@ one more than decoding draft tokens for prediction from primary head </p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py b/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
index ed23eb7aab..383ebf8296 100644
--- a/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
+++ b/latest/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
@@ -985,6 +985,14 @@ class MLA(nn.Module):
                 is_neox=pos_embd_params.is_neox,
             )
 
+        self.llama_4_scaling = False
+        if hasattr(config.pretrained_config, 'llama_4_scaling'):
+            self.llama_4_scaling = True
+            self.floor_scale = getattr(config.pretrained_config.llama_4_scaling,
+                                       'original_max_position_embeddings', 8192)
+            self.attn_scale = getattr(config.pretrained_config.llama_4_scaling,
+                                      'beta', 0.1)
+
         if not config.skip_create_weights_in_init:
             self.create_weights()
 
@@ -1127,6 +1135,18 @@ class MLA(nn.Module):
         return hidden_states.new_empty([num_tokens, hidden_size],
                                        dtype=hidden_states.dtype)
 
+    def _attention_scaling(self, q, position_ids):
+
+        def _get_attn_scale(position_ids: torch.Tensor) -> torch.Tensor:
+            positions = position_ids.view(-1)
+            floor = torch.floor((positions + 1.0) / self.floor_scale)
+            attn_scale = torch.log(floor + 1.0) * self.attn_scale + 1.0
+            return attn_scale.unsqueeze(-1)
+
+        attn_scale = _get_attn_scale(position_ids)
+        q = (q * attn_scale).to(q.dtype)
+        return q
+
     def forward_impl(self,
                      position_ids: Optional[torch.Tensor],
                      hidden_states: torch.Tensor,
@@ -1197,6 +1217,10 @@ class MLA(nn.Module):
                 assert position_ids is not None
                 k_pe_ctx = self.apply_rope(q_ctx, k_pe_ctx, position_ids)
 
+            if self.llama_4_scaling:
+                q_ctx = self._attention_scaling(
+                    q_ctx, position_ids[..., :num_ctx_tokens])
+
             self.forward_context(
                 q_ctx,
                 compressed_kv_ctx,
@@ -1217,6 +1241,10 @@ class MLA(nn.Module):
                 assert position_ids is not None
                 k_pe_gen = self.apply_rope(q_gen, k_pe_gen, position_ids)
 
+            if self.llama_4_scaling:
+                q_gen = self._attention_scaling(
+                    q_gen, position_ids[..., num_ctx_tokens:])
+
             self.forward_absorption_generation(
                 q_gen,
                 compressed_kv_gen,
diff --git a/latest/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py b/latest/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
index 811f11fce5..7574b8f6fd 100644
--- a/latest/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
+++ b/latest/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
@@ -48,7 +48,8 @@ from ..speculative import (SpecMetadata, get_num_extra_kv_tokens,
                            get_spec_metadata,
                            update_spec_config_from_model_config)
 from ..speculative.drafting_loops import BaseDraftingLoopWrapper
-from ..speculative.eagle3 import Eagle3ResourceManager, Eagle3SpecMetadata
+from ..speculative.eagle3 import (Eagle3OneModelSpecMetadata,
+                                  Eagle3ResourceManager, Eagle3SpecMetadata)
 from ..speculative.mtp import SampleStateTensorsMTP
 from ..speculative.utils import SpecDecodingTensor
 from ..utils import (get_model_extra_attrs,
@@ -426,6 +427,7 @@ class PyTorchModelEngine(ModelEngine):
             mapping=self.mapping,
             dist=self.dist,
             kv_cache_manager_key=self.kv_cache_manager_key,
+            sparse_attention_config=self.sparse_attention_config,
         )
         self.cuda_graph_runner = CUDAGraphRunner(cuda_graph_runner_config)
 
@@ -568,13 +570,12 @@ class PyTorchModelEngine(ModelEngine):
         # Reset the global cuda graph dummy request to None in warmup.
         self.cuda_graph_runner.padding_dummy_request = None
 
-        cp_type = self.mapping.cp_config.get('cp_type', None)
-        if cp_type is not None:
-            if cp_type in [CpType.ULYSSES, CpType.STAR]:
-                logger.info(
-                    "[ModelEngine::warmup] Skipping warmup for cp_type: ",
-                    cp_type.name)
-                return
+        if self.mapping.cp_size > 1:
+            cp_type = self.mapping.cp_config.get("cp_type", None)
+            logger.info(
+                f"[ModelEngine::warmup] Skipping warmup for cp_type: {None if cp_type is None else cp_type.name}."
+            )
+            return
 
         self._run_torch_compile_warmup(resource_manager)
         self._run_autotuner_warmup(resource_manager)
@@ -625,7 +626,7 @@ class PyTorchModelEngine(ModelEngine):
         """Runs a forward pass to populate the autotuner cache."""
         if not self.llm_args.enable_autotuner:
             return
-
+        AutoTuner.get().setup_distributed_state(self.mapping, self.dist)
         logger.info("Running autotuner warmup...")
         kv_cache_manager = resource_manager.get_resource_manager(
             self.kv_cache_manager_key)
@@ -635,8 +636,7 @@ class PyTorchModelEngine(ModelEngine):
             self.batch_size * (self.max_seq_len - 1))
 
         cache_path = os.environ.get("TLLM_AUTOTUNER_CACHE_PATH", None)
-        with self.no_cuda_graph(), autotune(cache_path=cache_path,
-                                            rank=self.mapping.rank):
+        with self.no_cuda_graph(), autotune(cache_path=cache_path):
             warmup_request = self._create_warmup_request(
                 resource_manager, curr_max_num_tokens, 0)
             with self._release_batch_context(warmup_request,
@@ -704,31 +704,48 @@ class PyTorchModelEngine(ModelEngine):
                 draft_lengths.append(0)
             draft_lengths = [self.max_total_draft_tokens]
 
+        # Create CUDA graphs for short and long sequences separately for sparse attention.
+        sparse_config = self.sparse_attention_config
+        if sparse_config is not None and sparse_config.needs_separate_short_long_cuda_graphs(
+        ):
+            # For short sequences, use the (seq_len_threshold - max_draft_len - 1) as the maximum sequence length
+            # to make sure all of the past and current input tokens are within the sequence length threshold.
+            # For long sequences, use the default maximum sequence length (self.max_seq_len).
+            max_seq_len = sparse_config.seq_len_threshold - (
+                self.max_draft_len + 1)
+            if max_seq_len < self.max_seq_len:
+                max_seq_len_list = [self.max_seq_len, max_seq_len]
+            else:
+                max_seq_len_list = [self.max_seq_len]
+        else:
+            max_seq_len_list = [self.max_seq_len]
+
         for bs in cuda_graph_batch_sizes:
             if bs > self.batch_size:
                 continue
 
             for draft_len in draft_lengths:
-                warmup_request = self._create_cuda_graph_warmup_request(
-                    resource_manager, bs, draft_len)
-                with self._release_batch_context(warmup_request,
-                                                 resource_manager) as batch:
-                    if batch is None:
-                        # No KV cache space, cannot continue capturing graphs
-                        return
+                for max_seq_len in max_seq_len_list:
+                    warmup_request = self._create_cuda_graph_warmup_request(
+                        resource_manager, bs, draft_len, max_seq_len)
+                    with self._release_batch_context(warmup_request,
+                                                     resource_manager) as batch:
+                        if batch is None:
+                            # No KV cache space, cannot continue capturing graphs
+                            return
 
-                    logger.info(
-                        f"Run generation-only CUDA graph warmup for batch size={bs}, draft_len={draft_len}"
-                    )
+                        logger.info(
+                            f"Run generation-only CUDA graph warmup for batch size={bs}, draft_len={draft_len}, max_seq_len={max_seq_len}"
+                        )
 
-                    self.enable_spec_decode = draft_len > 0 or self.is_draft_model
-                    self._update_draft_inference_state_for_warmup(
-                        batch, draft_len > 0, resource_manager)
+                        self.enable_spec_decode = draft_len > 0 or self.is_draft_model
+                        self._update_draft_inference_state_for_warmup(
+                            batch, draft_len > 0, resource_manager)
 
-                    self.forward(batch,
-                                 new_tensors_device=None,
-                                 resource_manager=resource_manager)
-                    torch.cuda.synchronize()
+                        self.forward(batch,
+                                     new_tensors_device=None,
+                                     resource_manager=resource_manager)
+                        torch.cuda.synchronize()
 
     def _capture_piecewise_cuda_graphs(self, resource_manager: ResourceManager):
         """Captures piecewise CUDA graphs for context/prefill steps via torch.compile."""
@@ -873,8 +890,11 @@ class PyTorchModelEngine(ModelEngine):
         return result
 
     def _create_cuda_graph_warmup_request(
-            self, resource_manager: ResourceManager, batch_size: int,
-            draft_len: int) -> Optional[ScheduledRequests]:
+            self,
+            resource_manager: ResourceManager,
+            batch_size: int,
+            draft_len: int,
+            max_seq_len: int = None) -> Optional[ScheduledRequests]:
         """Creates a dummy ScheduledRequests tailored for CUDA graph capture."""
         kv_cache_manager = resource_manager.get_resource_manager(
             self.kv_cache_manager_key)
@@ -902,7 +922,8 @@ class PyTorchModelEngine(ModelEngine):
         available_tokens = kv_cache_manager.get_num_available_tokens(draft_len)
 
         # Add one dummy request with the maximum possible sequence length.
-        token_num = max(1, min(available_tokens, self.max_seq_len - 1))
+        max_seq_len = self.max_seq_len if max_seq_len is None else max_seq_len
+        token_num = max(1, min(available_tokens, max_seq_len - 1))
         model_config = self.model.model_config.pretrained_config
         max_position_embeddings = getattr(model_config,
                                           'max_position_embeddings', None)
@@ -1671,12 +1692,12 @@ class PyTorchModelEngine(ModelEngine):
                     # Warmup doesn't have `total_input_len_cp` set because merge_helix_requests is not called.
                     if not self.is_warmup and not request.is_cuda_graph_dummy:
                         position_id = request.total_input_len_cp + request.py_decoding_iter - 1
-                    # TODO: [TRTLLM-5972] Lift the limitation that last rank is always the active one for helix.
-                    if self.mapping.cp_rank == self.mapping.cp_size - 1:
-                        past_seen_token_num = request.orig_prompt_len + request.py_decoding_iter - 1
+                    if request.py_helix_is_inactive_rank:
+                        past_seen_token_num = request.seqlen_this_rank_cp
                     else:
-                        # past_seen_token_num doesn't grow on inactive ranks.
-                        past_seen_token_num = request.orig_prompt_len
+                        # Discount the token added to active rank in resource manager as it hasn't
+                        # been previously seen.
+                        past_seen_token_num = request.seqlen_this_rank_cp - 1
 
                 position_ids.append(position_id)
                 num_cached_tokens_per_seq.append(past_seen_token_num)
@@ -2015,6 +2036,11 @@ class PyTorchModelEngine(ModelEngine):
 
         attn_metadata.request_ids = request_ids
         attn_metadata.prompt_lens = prompt_lengths
+        if helix_is_inactive_rank is not None and len(
+                helix_is_inactive_rank) > 0:
+            helix_is_inactive_rank = torch.tensor(helix_is_inactive_rank,
+                                                  dtype=torch.bool,
+                                                  device='cuda')
         attn_metadata.helix_is_inactive_rank = helix_is_inactive_rank
         attn_metadata.num_contexts = len(scheduled_requests.context_requests)
         # Use num_chunked_ctx_requests to record the number of extend context requests,
@@ -2089,6 +2115,9 @@ class PyTorchModelEngine(ModelEngine):
                 num_accepted_draft_tokens)]
             if isinstance(spec_metadata, Eagle3SpecMetadata):
                 spec_metadata.request_accepted_path = request_accepted_path
+            if isinstance(spec_metadata, Eagle3OneModelSpecMetadata):
+                spec_metadata.populate_sampling_params_for_one_model(
+                    scheduled_requests.all_requests())
             spec_metadata.prepare()
             inputs['spec_metadata'] = spec_metadata
 
@@ -2643,7 +2672,7 @@ class PyTorchModelEngine(ModelEngine):
             # attn_metadata now depends on spec_metadata since it determines the shape/content of spec_dec parameter Tensors
             is_spec_dec_mode = spec_metadata.spec_dec_mode.attention_need_spec_dec_mode(
                 spec_resource_manager, self.is_draft_model, self.attn_backend,
-                self.model_is_wrapped, spec_metadata.is_spec_dec_tree)
+                self.model_is_wrapped)
             attn_metadata.update_spec_dec_param(
                 batch_size=scheduled_requests.batch_size,
                 is_spec_decoding_enabled=is_spec_dec_mode,
@@ -2685,6 +2714,7 @@ class PyTorchModelEngine(ModelEngine):
                 spec_metadata=spec_metadata,
                 draft_tokens_cuda=self.draft_tokens_cuda
                 if self.is_spec_decode else None,
+                new_tensors_device=new_tensors_device,
                 spec_resource_manager=spec_resource_manager,
             )
             can_run_graph = key is not None
@@ -2844,11 +2874,17 @@ class PyTorchModelEngine(ModelEngine):
         # Disable UB for unsupported platforms
         if not ub.ub_supported():
             return False
-        use_nccl_symmetric = self.llm_args.allreduce_strategy == "NCCL_SYMMETRIC"
-        ub.initialize_userbuffers_manager(
-            self.mapping.tp_size, self.mapping.pp_size, self.mapping.cp_size,
-            self.mapping.rank, self.mapping.gpus_per_node,
-            hidden_size * self.max_num_tokens * 2, use_nccl_symmetric)
+        # NCCL_SYMMETRIC strategy no longer requires UserBuffer allocator initialization.
+        # It uses NCCLWindowAllocator from ncclUtils directly.
+        if self.llm_args.allreduce_strategy == "NCCL_SYMMETRIC":
+            # Skip UB initialization for NCCL_SYMMETRIC - it uses NCCLWindowAllocator directly
+            return False
+        ub.initialize_userbuffers_manager(self.mapping.tp_size,
+                                          self.mapping.pp_size,
+                                          self.mapping.cp_size,
+                                          self.mapping.rank,
+                                          self.mapping.gpus_per_node,
+                                          hidden_size * self.max_num_tokens * 2)
 
         return True
 
diff --git a/latest/_modules/index.html b/latest/_modules/index.html
index d515f21eb2..07b5cbff1b 100644
--- a/latest/_modules/index.html
+++ b/latest/_modules/index.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -513,7 +515,8 @@
                 <article class="bd-article">
                   
   <h1>All modules for which code is available</h1>
-<ul><li><a href="tensorrt_llm/bindings/executor.html">tensorrt_llm.bindings.executor</a></li>
+<ul><li><a href="tensorrt_llm/_torch/async_llm.html">tensorrt_llm._torch.async_llm</a></li>
+<li><a href="tensorrt_llm/bindings/executor.html">tensorrt_llm.bindings.executor</a></li>
 <li><a href="tensorrt_llm/builder.html">tensorrt_llm.builder</a></li>
 <li><a href="tensorrt_llm/disaggregated_params.html">tensorrt_llm.disaggregated_params</a></li>
 <li><a href="tensorrt_llm/executor/request.html">tensorrt_llm.executor.request</a></li>
@@ -694,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/_torch/async_llm.html b/latest/_modules/tensorrt_llm/_torch/async_llm.html
new file mode 100644
index 0000000000..49369bb47f
--- /dev/null
+++ b/latest/_modules/tensorrt_llm/_torch/async_llm.html
@@ -0,0 +1,770 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../../../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>tensorrt_llm._torch.async_llm &#8212; TensorRT LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
+    <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
+    <link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
+    <link rel="stylesheet" type="text/css" href="../../../_static/custom.css?v=19d20f17" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../../../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+
+
+    <script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../../../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../../../_static/copybutton.js?v=65e89d2a"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../../_static/togglebutton.js?v=4a39c7ea"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>DOCUMENTATION_OPTIONS.pagename = '_modules/tensorrt_llm/_torch/async_llm';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+
+    <link rel="icon" href="../../../_static/favicon.png"/>
+
+    <link rel="index" title="Index" href="../../../genindex.html" />
+    <link rel="search" title="Search" href="../../../search.html" />
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="1.2.0rc6" />
+
+
+  </head>
+
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+
+
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../../../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../../../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../../installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../../installation/containers.html">Pre-built release container images on NGC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../../examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_inference_async.html">Generate text asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_inference_async_streaming.html">Generate text in streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_sparse_attention.html">Sparse Attention</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_speculative_decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_kv_cache_connector.html">KV Cache Connector</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_runtime.html">Runtime Configuration Examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_sampling.html">Sampling Techniques Showcase</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../../examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../../deployment-guide/index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-qwen3-on-trtllm.html">Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html">Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../models/supported-models.html">Supported Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../../../models/adding-new-model.html">Adding a New Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../commands/trtllm-bench.html">trtllm-bench</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../../../commands/trtllm-eval.html">trtllm-eval</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../../../commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../../../commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../llm-api/index.html">LLM API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/disagg-serving.html">Disaggregated Serving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/kvcache.html">KV Cache System</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/long-sequence.html">Long Sequences</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/overlap-scheduler.html">Overlap Scheduler</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/sampling.html">Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/additional-outputs.html">Additional Outputs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/guided-decoding.html">Guided Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/speculative-decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/checkpoint-loading.html">Checkpoint Loading</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/helix.html">Helix Parallelism</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../features/kv-cache-connector.html">KV Cache Connector</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/overview.html">Architecture Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/perf-analysis.html">Performance Analysis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/dev-containers.html">Using Dev Containers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/api-change.html">LLM API Change Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../../../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    
+    <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
+    
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">tensorrt_llm._torch.async_llm</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <h1>Source code for tensorrt_llm._torch.async_llm</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
+
+<span class="kn">from</span><span class="w"> </span><span class="nn">..llmapi.llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span>
+<span class="kn">from</span><span class="w"> </span><span class="nn">..llmapi.llm_args</span><span class="w"> </span><span class="kn">import</span> <span class="n">RayPlacementConfig</span>
+
+
+<div class="viewcode-block" id="AsyncLLM">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM">[docs]</a>
+<span class="k">class</span><span class="w"> </span><span class="nc">AsyncLLM</span><span class="p">(</span><span class="n">LLM</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;AsyncLLM is a subclass of LLM that supports asynchronous setup, release and</span>
+<span class="sd">    resume operations that are necessary for RL or agentic scenarios.</span>
+
+<span class="sd">    Currently, RL APIs are only supported with Ray orchestrator.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+<div class="viewcode-block" id="AsyncLLM.__init__">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.__init__">[docs]</a>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">placement_groups</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">placement_bundle_indices</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">per_worker_gpu_share</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="o">*</span><span class="n">args</span><span class="p">,</span>
+        <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
+    <span class="p">):</span>
+        <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;orchestrator_type&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;ray&quot;</span>
+        <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;ray_placement_config&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">RayPlacementConfig</span><span class="p">(</span>
+            <span class="n">defer_workers_init</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+            <span class="n">placement_groups</span><span class="o">=</span><span class="n">placement_groups</span><span class="p">,</span>
+            <span class="n">placement_bundle_indices</span><span class="o">=</span><span class="n">placement_bundle_indices</span><span class="p">,</span>
+            <span class="n">per_worker_gpu_share</span><span class="o">=</span><span class="n">per_worker_gpu_share</span><span class="p">,</span>
+        <span class="p">)</span>
+
+        <span class="c1"># WAR: RL integration needs to use NCCL AllReduce for TP&gt;1 due to a bug in TRTLLM&#39;s AllReduce</span>
+        <span class="c1"># which will cause convergence issue when using multiple rollout instances.</span>
+        <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;allreduce_strategy&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;NCCL&quot;</span>
+
+        <span class="k">if</span> <span class="s2">&quot;ray_worker_extension_cls&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
+            <span class="n">kwargs</span><span class="p">[</span><span class="s2">&quot;ray_worker_extension_cls&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;tensorrt_llm.llmapi.rlhf_utils.WorkerExtension&quot;</span>
+
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">_async_initialized</span> <span class="o">=</span> <span class="kc">False</span></div>
+
+
+<div class="viewcode-block" id="AsyncLLM.setup_async">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.setup_async">[docs]</a>
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">setup_async</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Setup the LLM asynchronously.&quot;&quot;&quot;</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_async_initialized</span><span class="p">:</span>
+            <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">_executor</span><span class="o">.</span><span class="n">init_workers_async</span><span class="p">()</span>
+            <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">_executor</span><span class="o">.</span><span class="n">setup_engine_remote_async</span><span class="p">()</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">_async_initialized</span> <span class="o">=</span> <span class="kc">True</span>
+        <span class="k">return</span> <span class="bp">self</span></div>
+
+
+<div class="viewcode-block" id="AsyncLLM.release">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.release">[docs]</a>
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">release</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tags</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Release the GPU memory used by the LLM asynchronously.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            tags: List of memory tag strings to release (e.g., [&quot;model&quot;, &quot;kv_cache&quot;]).</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">collective_rpc</span><span class="p">(</span><span class="s2">&quot;sleep&quot;</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">tags</span><span class="p">,))</span></div>
+
+
+<div class="viewcode-block" id="AsyncLLM.resume">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.resume">[docs]</a>
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">resume</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tags</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Resume the GPU memory used by the LLM asynchronously.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            tags: List of memory tag strings to resume (e.g., [&quot;model&quot;, &quot;kv_cache&quot;]).</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">collective_rpc</span><span class="p">(</span><span class="s2">&quot;wakeup&quot;</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">tags</span><span class="p">,))</span></div>
+
+
+<div class="viewcode-block" id="AsyncLLM.update_weights">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.update_weights">[docs]</a>
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">update_weights</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">weights</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Update the weights of the LLM asynchronously.</span>
+
+
+<span class="sd">        Args:</span>
+<span class="sd">            weights: Dictionary mapping device UUIDs to IPC handles for weight tensors.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">collective_rpc</span><span class="p">(</span><span class="s2">&quot;update_weights&quot;</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">weights</span><span class="p">,))</span></div>
+
+
+<div class="viewcode-block" id="AsyncLLM.collective_rpc">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.collective_rpc">[docs]</a>
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">collective_rpc</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">method</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
+        <span class="n">args</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
+        <span class="n">kwargs</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">dict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">unique_reply_rank</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+    <span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Execute an asynchronous RPC call on all GPU workers. Currently, this is only supported for RayExecutor.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            method (str): The name of the worker method to execute.</span>
+<span class="sd">            args (tuple[Any, ...]): Positional arguments to pass to the worker method. Defaults to ().</span>
+<span class="sd">            kwargs (dict, optional): Keyword arguments to pass to the worker method. Defaults to None.</span>
+<span class="sd">            unique_reply_rank (int, optional): The rank of the worker that will be used to send the reply.</span>
+
+<span class="sd">        Returns:</span>
+<span class="sd">            list[Any]: A list of results from each worker.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">return</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">_executor</span><span class="o">.</span><span class="n">collective_rpc_async</span><span class="p">(</span>
+            <span class="n">method</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">,</span> <span class="n">unique_reply_rank</span><span class="o">=</span><span class="n">unique_reply_rank</span>
+        <span class="p">)</span></div>
+
+
+    <span class="k">def</span><span class="w"> </span><span class="fm">__await__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">setup_async</span><span class="p">()</span><span class="o">.</span><span class="fm">__await__</span><span class="p">()</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="fm">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;Please use &#39;async with AsyncLLM&#39; instead&quot;</span><span class="p">)</span>
+
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="fm">__aenter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">setup_async</span><span class="p">()</span>
+        <span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__enter__</span><span class="p">()</span>
+
+    <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="fm">__aexit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exc_type</span><span class="p">,</span> <span class="n">exc_val</span><span class="p">,</span> <span class="n">exc_tb</span><span class="p">):</span>
+        <span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__exit__</span><span class="p">(</span><span class="n">exc_type</span><span class="p">,</span> <span class="n">exc_val</span><span class="p">,</span> <span class="n">exc_tb</span><span class="p">)</span></div>
+
+</pre></div>
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+<div class="bd-sidebar-secondary"></div>
+
+
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../../../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../../../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on December 15, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/_modules/tensorrt_llm/builder.html b/latest/_modules/tensorrt_llm/builder.html
index fabcb7924e..3dca330583 100644
--- a/latest/_modules/tensorrt_llm/builder.html
+++ b/latest/_modules/tensorrt_llm/builder.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1922,9 +1924,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/disaggregated_params.html b/latest/_modules/tensorrt_llm/disaggregated_params.html
index be08f87ba9..c24d012843 100644
--- a/latest/_modules/tensorrt_llm/disaggregated_params.html
+++ b/latest/_modules/tensorrt_llm/disaggregated_params.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -725,9 +727,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/executor/request.html b/latest/_modules/tensorrt_llm/executor/request.html
index 2339a1ce92..aeee4c5b8c 100644
--- a/latest/_modules/tensorrt_llm/executor/request.html
+++ b/latest/_modules/tensorrt_llm/executor/request.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -770,9 +772,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/executor/result.html b/latest/_modules/tensorrt_llm/executor/result.html
index 547b177ef4..29baf703e1 100644
--- a/latest/_modules/tensorrt_llm/executor/result.html
+++ b/latest/_modules/tensorrt_llm/executor/result.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1595,9 +1597,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/executor/utils.html b/latest/_modules/tensorrt_llm/executor/utils.html
index 850f2ec8a4..6e245b4ed2 100644
--- a/latest/_modules/tensorrt_llm/executor/utils.html
+++ b/latest/_modules/tensorrt_llm/executor/utils.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -801,9 +803,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/functional.html b/latest/_modules/tensorrt_llm/functional.html
index 2b995214a2..5e982347d4 100644
--- a/latest/_modules/tensorrt_llm/functional.html
+++ b/latest/_modules/tensorrt_llm/functional.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -4742,7 +4744,8 @@
     <span class="n">TWOSHOT</span> <span class="o">=</span> <span class="mi">5</span>
     <span class="n">LOWPRECISION</span> <span class="o">=</span> <span class="mi">6</span>
     <span class="n">MNNVL</span> <span class="o">=</span> <span class="mi">7</span>
-    <span class="n">NCCL_SYMMETRIC</span> <span class="o">=</span> <span class="mi">8</span></div>
+    <span class="n">NCCL_SYMMETRIC</span> <span class="o">=</span> <span class="mi">8</span>
+    <span class="n">SYMM_MEM</span> <span class="o">=</span> <span class="mi">9</span>  <span class="c1"># PyTorch symmetric memory with MULTIMEM</span></div>
 
 
 
@@ -4909,7 +4912,10 @@
     <span class="n">pfc</span> <span class="o">=</span> <span class="n">trt</span><span class="o">.</span><span class="n">PluginFieldCollection</span><span class="p">(</span><span class="n">pfc</span><span class="p">)</span>
     <span class="n">ar_plug</span> <span class="o">=</span> <span class="n">allreduce_plg_creator</span><span class="o">.</span><span class="n">create_plugin</span><span class="p">(</span><span class="s2">&quot;allreduce&quot;</span><span class="p">,</span> <span class="n">pfc</span><span class="p">)</span>
     <span class="n">plug_inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">tensor</span><span class="p">]</span>
-    <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">!=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL</span> <span class="ow">and</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">!=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">UB</span><span class="p">:</span>
+    <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">{</span>
+            <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL</span><span class="p">,</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">UB</span><span class="p">,</span>
+            <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL_SYMMETRIC</span>
+    <span class="p">}:</span>
         <span class="n">plug_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">workspace</span><span class="p">)</span>
     <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">fusion_op</span> <span class="o">!=</span> <span class="n">AllReduceFusionOp</span><span class="o">.</span><span class="n">NONE</span><span class="p">:</span>
         <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">has_bias</span><span class="p">()</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
@@ -4984,7 +4990,7 @@
     <span class="n">workspace</span> <span class="o">=</span> <span class="kc">None</span>
     <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">!=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL</span> <span class="ow">and</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">!=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">UB</span><span class="p">:</span>
         <span class="k">if</span> <span class="n">current_all_reduce_helper</span><span class="p">()</span><span class="o">.</span><span class="n">workspace</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL</span>
+            <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">=</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">NCCL_SYMMETRIC</span>
         <span class="k">else</span><span class="p">:</span>
             <span class="n">workspace</span> <span class="o">=</span> <span class="n">current_all_reduce_helper</span><span class="p">()</span><span class="o">.</span><span class="n">workspace</span><span class="o">.</span><span class="n">trt_tensor</span>
     <span class="k">if</span> <span class="n">all_reduce_params</span><span class="o">.</span><span class="n">strategy</span> <span class="o">==</span> <span class="n">AllReduceStrategy</span><span class="o">.</span><span class="n">UB</span><span class="p">:</span>
@@ -8778,9 +8784,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/activation.html b/latest/_modules/tensorrt_llm/layers/activation.html
index a0b6c8fad0..e2fdfbc377 100644
--- a/latest/_modules/tensorrt_llm/layers/activation.html
+++ b/latest/_modules/tensorrt_llm/layers/activation.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -652,9 +654,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/attention.html b/latest/_modules/tensorrt_llm/layers/attention.html
index 67a43d26db..3df595237f 100644
--- a/latest/_modules/tensorrt_llm/layers/attention.html
+++ b/latest/_modules/tensorrt_llm/layers/attention.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -3515,9 +3517,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/cast.html b/latest/_modules/tensorrt_llm/layers/cast.html
index 030329811a..e66a2962bc 100644
--- a/latest/_modules/tensorrt_llm/layers/cast.html
+++ b/latest/_modules/tensorrt_llm/layers/cast.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -659,9 +661,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/conv.html b/latest/_modules/tensorrt_llm/layers/conv.html
index 33dbd6af41..a627960266 100644
--- a/latest/_modules/tensorrt_llm/layers/conv.html
+++ b/latest/_modules/tensorrt_llm/layers/conv.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -908,9 +910,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/embedding.html b/latest/_modules/tensorrt_llm/layers/embedding.html
index 8052999b69..b441d1261b 100644
--- a/latest/_modules/tensorrt_llm/layers/embedding.html
+++ b/latest/_modules/tensorrt_llm/layers/embedding.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1375,9 +1377,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/linear.html b/latest/_modules/tensorrt_llm/layers/linear.html
index 1f7922f3bd..e4b34ee78d 100644
--- a/latest/_modules/tensorrt_llm/layers/linear.html
+++ b/latest/_modules/tensorrt_llm/layers/linear.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1223,9 +1225,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/mlp.html b/latest/_modules/tensorrt_llm/layers/mlp.html
index 9c4ef5d37a..1a206c46d6 100644
--- a/latest/_modules/tensorrt_llm/layers/mlp.html
+++ b/latest/_modules/tensorrt_llm/layers/mlp.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1249,9 +1251,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/normalization.html b/latest/_modules/tensorrt_llm/layers/normalization.html
index 06f0167069..5a2d89b8be 100644
--- a/latest/_modules/tensorrt_llm/layers/normalization.html
+++ b/latest/_modules/tensorrt_llm/layers/normalization.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1013,9 +1015,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/layers/pooling.html b/latest/_modules/tensorrt_llm/layers/pooling.html
index 832201632c..d548775ee1 100644
--- a/latest/_modules/tensorrt_llm/layers/pooling.html
+++ b/latest/_modules/tensorrt_llm/layers/pooling.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -668,9 +670,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/build_cache.html b/latest/_modules/tensorrt_llm/llmapi/build_cache.html
index 197041fdde..ff17e08a8a 100644
--- a/latest/_modules/tensorrt_llm/llmapi/build_cache.html
+++ b/latest/_modules/tensorrt_llm/llmapi/build_cache.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -951,9 +953,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/llm.html b/latest/_modules/tensorrt_llm/llmapi/llm.html
index de298f7d3f..cac57fbcd3 100644
--- a/latest/_modules/tensorrt_llm/llmapi/llm.html
+++ b/latest/_modules/tensorrt_llm/llmapi/llm.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -717,7 +719,7 @@
         <span class="bp">self</span><span class="o">.</span><span class="n">mpi_session</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">mpi_session</span>
 
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">parallel_config</span><span class="o">.</span><span class="n">is_multi_gpu</span><span class="p">:</span>
-            <span class="k">if</span> <span class="n">get_device_count</span><span class="p">(</span>
+            <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="s2">&quot;RAY_LOCAL_WORLD_SIZE&quot;</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">get_device_count</span><span class="p">(</span>
             <span class="p">)</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">parallel_config</span><span class="o">.</span><span class="n">world_size_per_node</span><span class="p">:</span>
                 <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
                     <span class="sa">f</span><span class="s2">&quot;Only </span><span class="si">{</span><span class="n">get_device_count</span><span class="p">()</span><span class="si">}</span><span class="s2"> GPUs are available, but </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">parallel_config</span><span class="o">.</span><span class="n">world_size</span><span class="si">}</span><span class="s2"> are required.&quot;</span>
@@ -753,7 +755,6 @@
 
             <span class="bp">self</span><span class="o">.</span><span class="n">runtime_context</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_ModelRuntimeContext</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">llm_build_stats</span> <span class="o">=</span> <span class="n">LlmBuildStats</span><span class="p">()</span>
-
             <span class="bp">self</span><span class="o">.</span><span class="n">_build_model</span><span class="p">()</span>
 
         <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
@@ -1802,9 +1803,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/llm_args.html b/latest/_modules/tensorrt_llm/llmapi/llm_args.html
index 7e808512bb..e871d54b27 100644
--- a/latest/_modules/tensorrt_llm/llmapi/llm_args.html
+++ b/latest/_modules/tensorrt_llm/llmapi/llm_args.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -537,6 +539,11 @@
 <span class="kn">from</span><span class="w"> </span><span class="nn">strenum</span><span class="w"> </span><span class="kn">import</span> <span class="n">StrEnum</span>
 <span class="kn">from</span><span class="w"> </span><span class="nn">transformers</span><span class="w"> </span><span class="kn">import</span> <span class="n">PreTrainedTokenizerBase</span>
 
+<span class="k">try</span><span class="p">:</span>
+    <span class="kn">from</span><span class="w"> </span><span class="nn">ray.util.placement_group</span><span class="w"> </span><span class="kn">import</span> <span class="n">PlacementGroup</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+    <span class="n">PlacementGroup</span> <span class="o">=</span> <span class="kc">None</span>
+
 <span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.lora_helper</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">LoraConfig</span><span class="p">,</span>
                                       <span class="n">get_default_trtllm_modules_to_hf_modules</span><span class="p">)</span>
 
@@ -707,6 +714,11 @@
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Configuration for sparse attention.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
+    <span class="n">seq_len_threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;The sequence length threshold for separating short and long sequences.&quot;</span>
+    <span class="p">)</span>
 
     <span class="nd">@classmethod</span>
     <span class="k">def</span><span class="w"> </span><span class="nf">from_dict</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
@@ -742,6 +754,15 @@
     <span class="k">def</span><span class="w"> </span><span class="nf">get_indices_block_size</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
         <span class="k">return</span> <span class="mi">1</span>
 
+    <span class="k">def</span><span class="w"> </span><span class="nf">needs_separate_short_long_cuda_graphs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Determines whether to capture a dedicated CUDA graph for batches consisting entirely of short sequences.</span>
+<span class="sd">        If True, capture distinct graphs for short-only batches and general cases (e.g., long or mixed batches).</span>
+<span class="sd">        If False, capture a single unified CUDA graph for all sequences regardless of length.</span>
+<span class="sd">        The seq_len_threshold parameter defines the cutoff boundary between short and long sequences.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">return</span> <span class="kc">False</span>
+
 
 <div class="viewcode-block" id="RocketSparseAttentionConfig">
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig">[docs]</a>
@@ -801,6 +822,11 @@
                                       <span class="n">description</span><span class="o">=</span><span class="s2">&quot;The topk for the indexer.&quot;</span><span class="p">)</span>
     <span class="n">indexer_max_chunk_size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
         <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">description</span><span class="o">=</span><span class="s2">&quot;The maximum chunk size for the indexer.&quot;</span><span class="p">)</span>
+    <span class="c1"># TODO: enable this by default once the memory usage in attention metadata is optimized</span>
+    <span class="n">skip_indexer_for_short_seqs</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Whether to skip the MQA and Top-K in the indexer for short sequences.&quot;</span><span class="p">)</span>
 
 <div class="viewcode-block" id="DeepSeekSparseAttentionConfig.from_dict">
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict">[docs]</a>
@@ -813,6 +839,17 @@
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">supports_backend</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">backend</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
         <span class="k">return</span> <span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;pytorch&quot;</span></div>
+
+
+<div class="viewcode-block" id="DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs">[docs]</a>
+    <span class="k">def</span><span class="w"> </span><span class="nf">needs_separate_short_long_cuda_graphs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Whether to capture separate CUDA graphs for short and long sequences.</span>
+<span class="sd">        Use seq_len_threshold to determine the threshold for separating short and long sequences.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">seq_len_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index_topk</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">skip_indexer_for_short_seqs</span></div>
 </div>
 
 
@@ -1180,6 +1217,10 @@
     <span class="c1"># (N = acceptance_window) drops below this value.</span>
     <span class="n">acceptance_length_threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
 
+    <span class="c1"># Prototype. If true, allows non-greedy sampling when speculation is used. Only applicable</span>
+    <span class="c1"># to 1-model code paths; non-greedy sampling is always enabled on 2-model paths.</span>
+    <span class="n">allow_advanced_sampling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
+
     <span class="c1"># Validate acceptance controls at field level so they run on model creation</span>
     <span class="nd">@field_validator</span><span class="p">(</span><span class="s1">&#39;acceptance_window&#39;</span><span class="p">)</span>
     <span class="nd">@classmethod</span>
@@ -1748,6 +1789,65 @@
 
 
 
+<span class="k">class</span><span class="w"> </span><span class="nc">RayPlacementConfig</span><span class="p">(</span><span class="n">StrictBaseModel</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Configuration for Ray GPU workers placement.</span>
+<span class="sd">    This config is only used with AsyncLLM for RL scenarios.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="n">defer_workers_init</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;Defer Ray worker initialization until async setup.&quot;</span><span class="p">)</span>
+
+    <span class="n">placement_groups</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;List of Ray placement groups, one per node. &quot;</span>
+        <span class="s2">&quot;Each element must be a ray.util.placement_group.PlacementGroup instance.&quot;</span>
+    <span class="p">)</span>
+
+    <span class="n">placement_bundle_indices</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;List of bundle indices for each placement group. &quot;</span>
+        <span class="s2">&quot;Outer list corresponds to placement_groups, inner list contains bundle indices for that group.&quot;</span>
+    <span class="p">)</span>
+
+    <span class="n">per_worker_gpu_share</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;GPU fraction per worker for colocation scenarios. &quot;</span>
+        <span class="s2">&quot;Example: 0.1 means 10 actors can share one GPU. Defaults to 1.0 (one actor per GPU).&quot;</span>
+    <span class="p">)</span>
+
+    <span class="nd">@model_validator</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;after&#39;</span><span class="p">)</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">validate_ray_placement</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;RayPlacementConfig&#39;</span><span class="p">:</span>
+        <span class="n">has_pgs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">placement_groups</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+        <span class="n">has_indices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">placement_bundle_indices</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+
+        <span class="k">if</span> <span class="n">has_pgs</span> <span class="o">!=</span> <span class="n">has_indices</span><span class="p">:</span>
+            <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+                <span class="s2">&quot;placement_groups and placement_bundle_indices must be provided together&quot;</span>
+            <span class="p">)</span>
+
+        <span class="k">if</span> <span class="n">has_pgs</span><span class="p">:</span>
+            <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">placement_groups</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">placement_bundle_indices</span><span class="p">):</span>
+                <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;placement_groups length (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">placement_groups</span><span class="p">)</span><span class="si">}</span><span class="s2">) must equal &quot;</span>
+                    <span class="sa">f</span><span class="s2">&quot;placement_bundle_indices length (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">placement_bundle_indices</span><span class="p">)</span><span class="si">}</span><span class="s2">)&quot;</span>
+                <span class="p">)</span>
+            <span class="k">if</span> <span class="n">PlacementGroup</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">pg</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">placement_groups</span><span class="p">):</span>
+                    <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pg</span><span class="p">,</span> <span class="n">PlacementGroup</span><span class="p">):</span>
+                        <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
+                            <span class="sa">f</span><span class="s2">&quot;placement_groups[</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">] must be a Ray PlacementGroup, &quot;</span>
+                            <span class="sa">f</span><span class="s2">&quot;got </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">pg</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">per_worker_gpu_share</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mi">0</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">per_worker_gpu_share</span> <span class="o">&lt;=</span> <span class="mf">1.0</span><span class="p">):</span>
+                <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;per_worker_gpu_share must be between 0 and 1.0, &quot;</span>
+                    <span class="sa">f</span><span class="s2">&quot;got </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">per_worker_gpu_share</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="bp">self</span>
+
+
 <span class="k">class</span><span class="w"> </span><span class="nc">PybindMirror</span><span class="p">(</span><span class="n">ABC</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&#39;&#39;&#39; A class containing the utilities for mirroring Python classes to</span>
 <span class="sd">    pybinding classes.</span>
@@ -2675,9 +2775,17 @@
     <span class="n">env_overrides</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
         <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
         <span class="n">description</span><span class="o">=</span>
-        <span class="s2">&quot;[EXPERIMENTAL] Environment variable overrides. NOTE: import-time-cached env vars in the code won’t update unless the code fetches them from os.environ on demand.&quot;</span><span class="p">,</span>
+        <span class="s2">&quot;[EXPERIMENTAL] Environment variable overrides. NOTE: import-time-cached env vars in the code won&#39;t update unless the code fetches them from os.environ on demand.&quot;</span><span class="p">,</span>
         <span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
 
+    <span class="nd">@field_validator</span><span class="p">(</span><span class="s1">&#39;env_overrides&#39;</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;before&#39;</span><span class="p">)</span>
+    <span class="nd">@classmethod</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">coerce_env_overrides_to_str</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Coerce env_overrides values to strings for os.environ compatibility.&quot;&quot;&quot;</span>
+        <span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="k">return</span> <span class="n">v</span>
+        <span class="k">return</span> <span class="p">{</span><span class="nb">str</span><span class="p">(</span><span class="n">k</span><span class="p">):</span> <span class="nb">str</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
+
     <span class="n">_parallel_config</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_ParallelConfig</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
     <span class="n">_model_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_ModelFormatKind</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
     <span class="n">_speculative_model</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
@@ -2745,6 +2853,8 @@
     <span class="nd">@field_validator</span><span class="p">(</span><span class="s2">&quot;gpus_per_node&quot;</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;before&#39;</span><span class="p">)</span>
     <span class="nd">@classmethod</span>
     <span class="k">def</span><span class="w"> </span><span class="nf">validate_gpus_per_node</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">v</span><span class="p">,</span> <span class="n">info</span><span class="p">):</span>
+        <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="s2">&quot;RAY_LOCAL_WORLD_SIZE&quot;</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="k">return</span> <span class="n">info</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;tensor_parallel_size&quot;</span><span class="p">)</span>
         <span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
                 <span class="sa">f</span><span class="s2">&quot;Using default gpus_per_node: </span><span class="si">{</span><span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">device_count</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
@@ -3366,6 +3476,15 @@
         <span class="s2">&quot;The type of sampler to use. Options are TRTLLMSampler, TorchSampler or auto. Defaults to auto, which will use TorchSampler unless BeamSearch is requested.&quot;</span><span class="p">,</span>
         <span class="n">status</span><span class="o">=</span><span class="s2">&quot;beta&quot;</span><span class="p">)</span>
 
+    <span class="n">sampler_force_async_worker</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;Force usage of the async worker in the sampler for D2H &quot;</span>
+        <span class="s2">&quot;copies, even if confidential compute is not active. Normally, the &quot;</span>
+        <span class="s2">&quot;async worker should only be used when confidential compute is active. &quot;</span>
+        <span class="s2">&quot;This argument is provided to enable it for testing purposes, &quot;</span>
+        <span class="s2">&quot;irrespective of confidential compute state.&quot;</span><span class="p">,</span>
+        <span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
+
     <span class="n">enable_iter_perf_stats</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
         <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
         <span class="n">description</span><span class="o">=</span><span class="s2">&quot;Enable iteration performance statistics.&quot;</span><span class="p">,</span>
@@ -3498,6 +3617,13 @@
         <span class="s2">&quot;Allows users to extend the functions of the RayGPUWorker class.&quot;</span><span class="p">,</span>
         <span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
 
+    <span class="n">ray_placement_config</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">RayPlacementConfig</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
+        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Placement config for RayGPUWorker. Only used with AsyncLLM and orchestrator_type=&#39;ray&#39;.&quot;</span><span class="p">,</span>
+        <span class="n">exclude</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+        <span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
+
     <span class="n">enable_sleep</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
         <span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
         <span class="n">description</span><span class="o">=</span>
@@ -3763,6 +3889,27 @@
         <span class="k">return</span> <span class="bp">self</span></div>
 
 
+<div class="viewcode-block" id="TorchLlmArgs.validate_helix_tokens_per_block">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block">[docs]</a>
+    <span class="nd">@model_validator</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;after&#39;</span><span class="p">)</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">validate_helix_tokens_per_block</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;TorchLlmArgs&#39;</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Validate that cp_config.tokens_per_block matches kv_cache_config.tokens_per_block when HELIX parallelism is active.&quot;&quot;&quot;</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">context_parallel_size</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span><span class="p">:</span>
+            <span class="k">return</span> <span class="bp">self</span>
+
+        <span class="n">cp_type</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;cp_type&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+        <span class="k">if</span> <span class="n">cp_type</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">str</span><span class="p">(</span><span class="n">cp_type</span><span class="p">)</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> <span class="o">==</span> <span class="s1">&#39;HELIX&#39;</span><span class="p">:</span>
+            <span class="n">cp_tokens_per_block</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;tokens_per_block&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+            <span class="k">if</span> <span class="n">cp_tokens_per_block</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="n">kv_tokens_per_block</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">kv_cache_config</span><span class="o">.</span><span class="n">tokens_per_block</span>
+                <span class="k">assert</span> <span class="n">cp_tokens_per_block</span> <span class="o">==</span> <span class="n">kv_tokens_per_block</span><span class="p">,</span> <span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;When HELIX parallelism is active, cp_config.tokens_per_block (</span><span class="si">{</span><span class="n">cp_tokens_per_block</span><span class="si">}</span><span class="s2">) &quot;</span>
+                    <span class="sa">f</span><span class="s2">&quot;must match kv_cache_config.tokens_per_block (</span><span class="si">{</span><span class="n">kv_tokens_per_block</span><span class="si">}</span><span class="s2">).&quot;</span>
+                <span class="p">)</span>
+
+        <span class="k">return</span> <span class="bp">self</span></div>
+
+
 <div class="viewcode-block" id="TorchLlmArgs.warn_on_unstable_feature_usage">
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">warn_on_unstable_feature_usage</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;TorchLlmArgs&#39;</span><span class="p">:</span>
@@ -3855,6 +4002,17 @@
         <span class="k">return</span> <span class="bp">self</span></div>
 
 
+<div class="viewcode-block" id="TorchLlmArgs.validate_ray_placement_config">
+<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config">[docs]</a>
+    <span class="nd">@model_validator</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;after&#39;</span><span class="p">)</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">validate_ray_placement_config</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;TorchLlmArgs&#39;</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">ray_placement_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">orchestrator_type</span> <span class="o">!=</span> <span class="s2">&quot;ray&quot;</span><span class="p">:</span>
+            <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+                <span class="s2">&quot;ray_placement_config is only supported with orchestrator_type=&#39;ray&#39;&quot;</span>
+            <span class="p">)</span>
+        <span class="k">return</span> <span class="bp">self</span></div>
+
+
 <div class="viewcode-block" id="TorchLlmArgs.get_executor_config">
 <a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config">[docs]</a>
     <span class="k">def</span><span class="w"> </span><span class="nf">get_executor_config</span><span class="p">(</span>
@@ -4087,9 +4245,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/mm_encoder.html b/latest/_modules/tensorrt_llm/llmapi/mm_encoder.html
index 75c2a35120..8193bf85e3 100644
--- a/latest/_modules/tensorrt_llm/llmapi/mm_encoder.html
+++ b/latest/_modules/tensorrt_llm/llmapi/mm_encoder.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -778,9 +780,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/llmapi/mpi_session.html b/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
index 1a5bbbee79..57496f23c0 100644
--- a/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
+++ b/latest/_modules/tensorrt_llm/llmapi/mpi_session.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1248,9 +1250,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/baichuan/model.html b/latest/_modules/tensorrt_llm/models/baichuan/model.html
index 1feedcf89d..17b9618c91 100644
--- a/latest/_modules/tensorrt_llm/models/baichuan/model.html
+++ b/latest/_modules/tensorrt_llm/models/baichuan/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -886,9 +888,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/bert/model.html b/latest/_modules/tensorrt_llm/models/bert/model.html
index ff8bca7065..9c5e8ca7ef 100644
--- a/latest/_modules/tensorrt_llm/models/bert/model.html
+++ b/latest/_modules/tensorrt_llm/models/bert/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1190,9 +1192,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/bloom/model.html b/latest/_modules/tensorrt_llm/models/bloom/model.html
index bdfef77765..52a5820861 100644
--- a/latest/_modules/tensorrt_llm/models/bloom/model.html
+++ b/latest/_modules/tensorrt_llm/models/bloom/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -798,9 +800,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/chatglm/config.html b/latest/_modules/tensorrt_llm/models/chatglm/config.html
index 24e4fa539e..fa1b3c6424 100644
--- a/latest/_modules/tensorrt_llm/models/chatglm/config.html
+++ b/latest/_modules/tensorrt_llm/models/chatglm/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -815,9 +817,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/chatglm/model.html b/latest/_modules/tensorrt_llm/models/chatglm/model.html
index 3ad79c2e00..ac8294f98c 100644
--- a/latest/_modules/tensorrt_llm/models/chatglm/model.html
+++ b/latest/_modules/tensorrt_llm/models/chatglm/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1014,9 +1016,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/clip/model.html b/latest/_modules/tensorrt_llm/models/clip/model.html
index 3fa05c6d7a..054e22a83e 100644
--- a/latest/_modules/tensorrt_llm/models/clip/model.html
+++ b/latest/_modules/tensorrt_llm/models/clip/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -843,9 +845,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/cogvlm/config.html b/latest/_modules/tensorrt_llm/models/cogvlm/config.html
index 964bf3dc22..75a7de91e7 100644
--- a/latest/_modules/tensorrt_llm/models/cogvlm/config.html
+++ b/latest/_modules/tensorrt_llm/models/cogvlm/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -674,9 +676,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/cogvlm/model.html b/latest/_modules/tensorrt_llm/models/cogvlm/model.html
index 1a095e6404..4d2557aec9 100644
--- a/latest/_modules/tensorrt_llm/models/cogvlm/model.html
+++ b/latest/_modules/tensorrt_llm/models/cogvlm/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -927,9 +929,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/commandr/model.html b/latest/_modules/tensorrt_llm/models/commandr/model.html
index c005f54ad6..cb0c864cd7 100644
--- a/latest/_modules/tensorrt_llm/models/commandr/model.html
+++ b/latest/_modules/tensorrt_llm/models/commandr/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -825,9 +827,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dbrx/config.html b/latest/_modules/tensorrt_llm/models/dbrx/config.html
index 33668b804a..9a7e917fea 100644
--- a/latest/_modules/tensorrt_llm/models/dbrx/config.html
+++ b/latest/_modules/tensorrt_llm/models/dbrx/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -689,9 +691,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dbrx/model.html b/latest/_modules/tensorrt_llm/models/dbrx/model.html
index 1f7d28a8c7..c058b28af2 100644
--- a/latest/_modules/tensorrt_llm/models/dbrx/model.html
+++ b/latest/_modules/tensorrt_llm/models/dbrx/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -815,9 +817,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html b/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
index 58859057d6..4ef0de9bfe 100644
--- a/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
+++ b/latest/_modules/tensorrt_llm/models/deepseek_v1/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -909,9 +911,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html b/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
index b8f43dd0ac..14b01a9e89 100644
--- a/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
+++ b/latest/_modules/tensorrt_llm/models/deepseek_v2/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -991,9 +993,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/dit/model.html b/latest/_modules/tensorrt_llm/models/dit/model.html
index 79248a4f0f..3b1edabc97 100644
--- a/latest/_modules/tensorrt_llm/models/dit/model.html
+++ b/latest/_modules/tensorrt_llm/models/dit/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1027,9 +1029,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/eagle/model.html b/latest/_modules/tensorrt_llm/models/eagle/model.html
index 746b54b9c6..72268516ce 100644
--- a/latest/_modules/tensorrt_llm/models/eagle/model.html
+++ b/latest/_modules/tensorrt_llm/models/eagle/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1963,9 +1965,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/enc_dec/model.html b/latest/_modules/tensorrt_llm/models/enc_dec/model.html
index 29b872204a..5670bcb067 100644
--- a/latest/_modules/tensorrt_llm/models/enc_dec/model.html
+++ b/latest/_modules/tensorrt_llm/models/enc_dec/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -2870,9 +2872,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/falcon/config.html b/latest/_modules/tensorrt_llm/models/falcon/config.html
index 1544f9b733..a517fbaa87 100644
--- a/latest/_modules/tensorrt_llm/models/falcon/config.html
+++ b/latest/_modules/tensorrt_llm/models/falcon/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -750,9 +752,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/falcon/model.html b/latest/_modules/tensorrt_llm/models/falcon/model.html
index 2cb24e48cd..77a7150981 100644
--- a/latest/_modules/tensorrt_llm/models/falcon/model.html
+++ b/latest/_modules/tensorrt_llm/models/falcon/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -912,9 +914,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gemma/config.html b/latest/_modules/tensorrt_llm/models/gemma/config.html
index d887eb7302..7e7f0420b7 100644
--- a/latest/_modules/tensorrt_llm/models/gemma/config.html
+++ b/latest/_modules/tensorrt_llm/models/gemma/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -840,9 +842,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gemma/model.html b/latest/_modules/tensorrt_llm/models/gemma/model.html
index 38fa4c0da5..c0bcd22677 100644
--- a/latest/_modules/tensorrt_llm/models/gemma/model.html
+++ b/latest/_modules/tensorrt_llm/models/gemma/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1035,9 +1037,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gpt/config.html b/latest/_modules/tensorrt_llm/models/gpt/config.html
index b4564e9fea..408afb19c0 100644
--- a/latest/_modules/tensorrt_llm/models/gpt/config.html
+++ b/latest/_modules/tensorrt_llm/models/gpt/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -959,9 +961,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gpt/model.html b/latest/_modules/tensorrt_llm/models/gpt/model.html
index c7ccc4dcd4..80d5cb7e7b 100644
--- a/latest/_modules/tensorrt_llm/models/gpt/model.html
+++ b/latest/_modules/tensorrt_llm/models/gpt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1062,9 +1064,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptj/config.html b/latest/_modules/tensorrt_llm/models/gptj/config.html
index 3f391136e7..af1260074d 100644
--- a/latest/_modules/tensorrt_llm/models/gptj/config.html
+++ b/latest/_modules/tensorrt_llm/models/gptj/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -688,9 +690,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptj/model.html b/latest/_modules/tensorrt_llm/models/gptj/model.html
index 1c37471537..23c9da2bfb 100644
--- a/latest/_modules/tensorrt_llm/models/gptj/model.html
+++ b/latest/_modules/tensorrt_llm/models/gptj/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -838,9 +840,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/gptneox/model.html b/latest/_modules/tensorrt_llm/models/gptneox/model.html
index 7f514b91c3..f10a22212d 100644
--- a/latest/_modules/tensorrt_llm/models/gptneox/model.html
+++ b/latest/_modules/tensorrt_llm/models/gptneox/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -780,9 +782,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/llama/config.html b/latest/_modules/tensorrt_llm/models/llama/config.html
index fdf8b75fd5..35794b9a22 100644
--- a/latest/_modules/tensorrt_llm/models/llama/config.html
+++ b/latest/_modules/tensorrt_llm/models/llama/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -914,9 +916,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/llama/model.html b/latest/_modules/tensorrt_llm/models/llama/model.html
index d09be90d7a..4960edfb55 100644
--- a/latest/_modules/tensorrt_llm/models/llama/model.html
+++ b/latest/_modules/tensorrt_llm/models/llama/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1262,9 +1264,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mamba/model.html b/latest/_modules/tensorrt_llm/models/mamba/model.html
index d11931dd33..2e19b21a09 100644
--- a/latest/_modules/tensorrt_llm/models/mamba/model.html
+++ b/latest/_modules/tensorrt_llm/models/mamba/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1107,9 +1109,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/medusa/config.html b/latest/_modules/tensorrt_llm/models/medusa/config.html
index 5fcfb61ebc..8169b9e474 100644
--- a/latest/_modules/tensorrt_llm/models/medusa/config.html
+++ b/latest/_modules/tensorrt_llm/models/medusa/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -747,9 +749,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/medusa/model.html b/latest/_modules/tensorrt_llm/models/medusa/model.html
index e903cea5f6..160c44d25d 100644
--- a/latest/_modules/tensorrt_llm/models/medusa/model.html
+++ b/latest/_modules/tensorrt_llm/models/medusa/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -897,9 +899,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mllama/model.html b/latest/_modules/tensorrt_llm/models/mllama/model.html
index 6be138b113..02d061302a 100644
--- a/latest/_modules/tensorrt_llm/models/mllama/model.html
+++ b/latest/_modules/tensorrt_llm/models/mllama/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -2208,9 +2210,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html b/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
index b9d46b13f0..6ec32c5df2 100644
--- a/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
+++ b/latest/_modules/tensorrt_llm/models/mmdit_sd3/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1274,9 +1276,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/modeling_utils.html b/latest/_modules/tensorrt_llm/models/modeling_utils.html
index bd2432a93e..3812b60a07 100644
--- a/latest/_modules/tensorrt_llm/models/modeling_utils.html
+++ b/latest/_modules/tensorrt_llm/models/modeling_utils.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -2683,9 +2685,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/mpt/model.html b/latest/_modules/tensorrt_llm/models/mpt/model.html
index 8813fa46cb..459ef5830e 100644
--- a/latest/_modules/tensorrt_llm/models/mpt/model.html
+++ b/latest/_modules/tensorrt_llm/models/mpt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -812,9 +814,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html b/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
index ffda4025f2..155f15b151 100644
--- a/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
+++ b/latest/_modules/tensorrt_llm/models/multimodal_encoders/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -746,9 +748,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html b/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
index 1c19643337..abb527e04b 100644
--- a/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
+++ b/latest/_modules/tensorrt_llm/models/multimodal_encoders/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -814,9 +816,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/opt/model.html b/latest/_modules/tensorrt_llm/models/opt/model.html
index e10798ca21..4e3f9e15a7 100644
--- a/latest/_modules/tensorrt_llm/models/opt/model.html
+++ b/latest/_modules/tensorrt_llm/models/opt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -817,9 +819,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/phi/model.html b/latest/_modules/tensorrt_llm/models/phi/model.html
index deecc0188d..3ca7749044 100644
--- a/latest/_modules/tensorrt_llm/models/phi/model.html
+++ b/latest/_modules/tensorrt_llm/models/phi/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -859,9 +861,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/phi3/model.html b/latest/_modules/tensorrt_llm/models/phi3/model.html
index ce940e2d70..05e965e296 100644
--- a/latest/_modules/tensorrt_llm/models/phi3/model.html
+++ b/latest/_modules/tensorrt_llm/models/phi3/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -955,9 +957,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html b/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
index cbe0030d9c..b71d6ca879 100644
--- a/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
+++ b/latest/_modules/tensorrt_llm/models/recurrentgemma/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1260,9 +1262,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/models/redrafter/model.html b/latest/_modules/tensorrt_llm/models/redrafter/model.html
index 6abe0d27cd..05ecd0e886 100644
--- a/latest/_modules/tensorrt_llm/models/redrafter/model.html
+++ b/latest/_modules/tensorrt_llm/models/redrafter/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -947,9 +949,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/plugin/plugin.html b/latest/_modules/tensorrt_llm/plugin/plugin.html
index 25b371b9d8..6c5c97516b 100644
--- a/latest/_modules/tensorrt_llm/plugin/plugin.html
+++ b/latest/_modules/tensorrt_llm/plugin/plugin.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1435,9 +1437,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/quantization/mode.html b/latest/_modules/tensorrt_llm/quantization/mode.html
index 35fc7812a6..8937e5cda1 100644
--- a/latest/_modules/tensorrt_llm/quantization/mode.html
+++ b/latest/_modules/tensorrt_llm/quantization/mode.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1109,9 +1111,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html b/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
index cce8bf592a..63d236e56a 100644
--- a/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
+++ b/latest/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1903,9 +1905,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html b/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
index 949b58db8e..81d68fc822 100644
--- a/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1174,9 +1176,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/generation.html b/latest/_modules/tensorrt_llm/runtime/generation.html
index 3d57374b31..d2e389435a 100644
--- a/latest/_modules/tensorrt_llm/runtime/generation.html
+++ b/latest/_modules/tensorrt_llm/runtime/generation.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -5514,9 +5516,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html b/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
index 93d0048c58..3f91525255 100644
--- a/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
+++ b/latest/_modules/tensorrt_llm/runtime/kv_cache_manager.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1118,9 +1120,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/model_runner.html b/latest/_modules/tensorrt_llm/runtime/model_runner.html
index 77830c02ea..23889296f5 100644
--- a/latest/_modules/tensorrt_llm/runtime/model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -991,6 +993,7 @@
                 <span class="n">prompt_table</span><span class="p">,</span>
                 <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">),</span> <span class="s2">&quot;Prompt table should be str or torch.Tensor&quot;</span>
             <span class="n">prompt_table_data</span> <span class="o">=</span> <span class="n">prompt_table</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
+            <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">current_stream</span><span class="p">()</span><span class="o">.</span><span class="n">synchronize</span><span class="p">()</span>
 
         <span class="k">return</span> <span class="n">prompt_table_data</span>
 
@@ -1637,9 +1640,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html b/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
index 074e6b3161..ac227fda67 100644
--- a/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
+++ b/latest/_modules/tensorrt_llm/runtime/model_runner_cpp.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1850,9 +1852,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html b/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
index 37ef76d322..e53e09fcc9 100644
--- a/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
+++ b/latest/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -3432,9 +3434,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/runtime/session.html b/latest/_modules/tensorrt_llm/runtime/session.html
index aa650ba6b9..da040db4d2 100644
--- a/latest/_modules/tensorrt_llm/runtime/session.html
+++ b/latest/_modules/tensorrt_llm/runtime/session.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -978,9 +980,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_modules/tensorrt_llm/sampling_params.html b/latest/_modules/tensorrt_llm/sampling_params.html
index 12e5d655ff..75447e5237 100644
--- a/latest/_modules/tensorrt_llm/sampling_params.html
+++ b/latest/_modules/tensorrt_llm/sampling_params.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -862,9 +864,13 @@
 <a class="viewcode-back" href="../../llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding">[docs]</a>
     <span class="nd">@staticmethod</span>
     <span class="k">def</span><span class="w"> </span><span class="nf">params_imply_greedy_decoding</span><span class="p">(</span>
-        <span class="o">*</span><span class="p">,</span> <span class="n">temperature</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span> <span class="n">top_p</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span> <span class="n">top_k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
+        <span class="o">*</span><span class="p">,</span>
+        <span class="n">temperature</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span>
+        <span class="n">top_p</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span>
+        <span class="n">top_k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">],</span>
+        <span class="n">use_beam_search</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
     <span class="p">):</span>
-        <span class="k">return</span> <span class="p">(</span>
+        <span class="k">return</span> <span class="p">(</span><span class="ow">not</span> <span class="n">use_beam_search</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span>
             <span class="p">(</span><span class="n">temperature</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">top_p</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">top_k</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span>
             <span class="ow">or</span> <span class="n">top_k</span> <span class="o">==</span> <span class="mi">1</span>
             <span class="ow">or</span> <span class="n">top_p</span> <span class="o">==</span> <span class="mf">0.0</span>
@@ -874,10 +880,11 @@
 
     <span class="nd">@property</span>
     <span class="k">def</span><span class="w"> </span><span class="nf">_greedy_decoding</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
-        <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_beam_search</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">params_imply_greedy_decoding</span><span class="p">(</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">params_imply_greedy_decoding</span><span class="p">(</span>
             <span class="n">temperature</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">temperature</span><span class="p">,</span>
             <span class="n">top_p</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">top_p</span><span class="p">,</span>
             <span class="n">top_k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">top_k</span><span class="p">,</span>
+            <span class="n">use_beam_search</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">use_beam_search</span><span class="p">,</span>
         <span class="p">)</span>
 
     <span class="nd">@property</span>
@@ -1192,9 +1199,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/_sources/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt b/latest/_sources/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt
index da72ee5464..ad0e9975a1 100644
--- a/latest/_sources/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt
+++ b/latest/_sources/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt
@@ -30,7 +30,7 @@ In this blog, we share the configurations and procedures about how to reproduce
       - [Expected Result Format](#expected-result-format-3)
   - [Exploring more ISL/OSL combinations](#exploring-more-islosl-combinations)
     - [WIP: Enable more features by default](#wip-enable-more-features-by-default)
-    - [Not supported: MLA chunked context support on Hopper](#not-supported-mla-chunked-context-support-on-hopper)
+    - [MLA chunked context](#mla-chunked-context)
     - [Out of memory issues](#out-of-memory-issues)
 
 
@@ -69,8 +69,11 @@ For NVIDIA Hopper GPUs, it's recommended to use the FP8 version of the DeepSeek
 YOUR_MODEL_PATH=<YOUR_MODEL_PATH>
 cd $YOUR_MODEL_PATH
 
-## Download FP4 model for Blackwell GPUs
-git clone https://huggingface.co/nvidia/DeepSeek-R1-FP4
+## Download NVFP4 model for Blackwell GPUs
+git clone https://huggingface.co/nvidia/DeepSeek-R1-NVFP4-v2
+
+## Or the 0528 version
+git clone https://huggingface.co/nvidia/DeepSeek-R1-0528-NVFP4-v2
 
 ## Download FP8 model for Hopper GPUs
 ## FP8 model also works for Blackwell, but FP4 has the best performance on Blackwell.
@@ -248,13 +251,13 @@ To do the benchmark, run the following command:
 
 ```bash
 # generate synthetic dataset
-python ${YOUR_WORK_PATH}/benchmarks/cpp/prepare_dataset.py \
-        --stdout \
-        --tokenizer nvidia/DeepSeek-R1-FP4 \
+trtllm-bench --model nvidia/DeepSeek-R1-FP4 \
+        prepare-dataset \
+        --output dataset.txt \
         token-norm-dist \
         --input-mean 1024 --output-mean 2048 \
         --input-stdev 0 --output-stdev 0 \
-        --num-requests 49152 > dataset.txt
+        --num-requests 49152
 
 YOUR_DATA_PATH=./dataset.txt
 
@@ -350,13 +353,14 @@ To do the benchmark, run the following command:
 
 ```bash
 # generate synthetic dataset
-python ${YOUR_WORK_PATH}/benchmarks/cpp/prepare_dataset.py \
-        --stdout \
-        --tokenizer deepseek-ai/DeepSeek-R1 \
+trtllm-bench --model nvidia/DeepSeek-R1-FP4 \
+        prepare-dataset \
+        --output dataset.txt \
         token-norm-dist \
         --input-mean 1024 --output-mean 2048 \
         --input-stdev 0 --output-stdev 0 \
-        --num-requests 5120 > dataset.txt
+        --num-requests 5120
+
 YOUR_DATA_PATH=./dataset.txt
 
 cat >./extra-llm-api-config.yml<<EOF
@@ -401,10 +405,10 @@ Average request latency (ms):                     181540.5739
 
 ## Exploring more ISL/OSL combinations
 
-To benchmark TensorRT LLM on DeepSeek models with more ISL/OSL combinations, you can use `prepare_dataset.py` to generate the dataset and use similar commands mentioned in the previous section. TensorRT LLM is working on enhancements that can make the benchmark process smoother.
+To benchmark TensorRT LLM on DeepSeek models with more ISL/OSL combinations, you can use the `trtllm-bench prepare-dataset` subcommand to generate the dataset and use similar commands mentioned in the previous section. TensorRT LLM is working on enhancements that can make the benchmark process smoother.
 ### WIP: Enable more features by default
 
-Currently, there are some features that need to be enabled through a user-defined file `extra-llm-api-config.yml`, such as CUDA graph, overlap scheduler and attention dp. We're working on to enable those features by default, so that users can get good out-of-the-box performance on DeepSeek models.
+Currently, there are some features that need to be enabled through a user-defined file `extra-llm-api-config.yml`, such as attention dp. We're working on to enable those features by default, so that users can get good out-of-the-box performance on DeepSeek models.
 
 Note that, `max_batch_size` and `max_num_tokens` can easily affect the performance. The default values for them are already carefully designed and should deliver good performance on overall cases, however, you may still need to tune it for peak performance.
 
@@ -414,7 +418,7 @@ For more details on `max_batch_size` and `max_num_tokens`, refer to [Tuning Max
 
 ### MLA chunked context
 
-MLA currently supports the chunked context feature on both Hopper and Blackwell GPUs. You can use `--enable_chunked_context` to enable it. This feature is primarily designed to reduce TPOT (Time Per Output Token). The default chunk size is set to `max_num_tokens`. If you want to achieve a lower TPOT, you can appropriately reduce the chunk size. However, please note that this will also decrease overall throughput. Therefore, a trade-off needs to be considered. 
+MLA currently supports the chunked context feature on both Hopper and Blackwell GPUs. You can use `--enable_chunked_context` to enable it. This feature is primarily designed to reduce TPOT (Time Per Output Token). The default chunk size is set to `max_num_tokens`. If you want to achieve a lower TPOT, you can appropriately reduce the chunk size. However, please note that this will also decrease overall throughput. Therefore, a trade-off needs to be considered.
 
 For more details on `max_num_tokens`, refer to [Tuning Max Batch Size and Max Num Tokens](../performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md).
 
diff --git a/latest/_sources/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md.txt b/latest/_sources/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md.txt
index 4b80603e29..800c406bd2 100644
--- a/latest/_sources/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md.txt
@@ -46,7 +46,7 @@ In this third blog of our scaling Expert Parallelism (EP) series, we push the pe
 
 The wo GEMM is the final linear layer within the multi-head attention block that produces the final outputs. While DeepSeek R1's MLA modifies the initial projections for keys and values, the wo GEMM operator remains a critical and standard component for finalizing the attention computation. In the term, "wo" is the abbreviation for the weight matrix for the output.
 
-We've evaluated that quantizing the wo GEMM to FP4 still satisfies the accuracy requirements, maintaining a similar MTP accept rate (AR) while improving end-to-end performance. The [NVIDIA TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) team has published checkpoints that additionally quantize the wo module in attention layers to FP4 on HuggingFace:
+We've evaluated that quantizing the wo GEMM to FP4 still satisfies the accuracy requirements, maintaining a similar MTP accept rate (AR) while improving end-to-end performance. The [NVIDIA Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) team has published checkpoints that additionally quantize the wo module in attention layers to FP4 on HuggingFace:
 * https://huggingface.co/nvidia/DeepSeek-R1-FP4-v2
 * https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2
 
diff --git a/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt b/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
index cd55d049d4..b5e3e6558a 100644
--- a/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt
@@ -67,7 +67,7 @@ We have explored a mixed precision recipe, which provides a better tradeoff betw
 
 *TensorRT LLM already supports [FP8 Attention](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core/deepseek_v3#fp8-kv-cache-and-mla) while for this latency scenario low-precision attention computation doesn't help with performance so we choose to use bf16 precision for the Attention Modules.
 
-** nvfp4 model checkpoint is generated by the [NVIDIA TensorRT Model Optimizer toolkit](https://github.com/NVIDIA/TensorRT-Model-Optimizer).
+** nvfp4 model checkpoint is generated by the [NVIDIA Model Optimizer toolkit](https://github.com/NVIDIA/Model-Optimizer).
 
 *** RouterGEMM uses bf16 inputs/weights with fp32 outputs for numerical stability
 
diff --git a/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt b/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
index 2da07411a8..d2483af3f3 100644
--- a/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
+++ b/latest/_sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
@@ -29,7 +29,7 @@ The mixed precision recipe for DeepSeek R1 throughput scenario is almost the sam
 * FP8 KV cache and FP8 attention, rather than BF16 precision.
 * FP4 Allgather for better communication bandwidth utilization.
 
-The checkpoint used in this blog is hosted in [nvidia/DeepSeek-R1-FP4](https://huggingface.co/nvidia/DeepSeek-R1-FP4), generated by [NVIDIA Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer). The accuracy score of common dataset on this FP4 checkpoint and TensorRT LLM implementations are:
+The checkpoint used in this blog is hosted in [nvidia/DeepSeek-R1-FP4](https://huggingface.co/nvidia/DeepSeek-R1-FP4), generated by [NVIDIA Model Optimizer](https://github.com/NVIDIA/Model-Optimizer). The accuracy score of common dataset on this FP4 checkpoint and TensorRT LLM implementations are:
 
 | Precision | GPQA Diamond | MATH-500
 | :-- | :-- | :-- |
diff --git a/latest/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt b/latest/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
index 81c296fe9d..7441ed09a2 100644
--- a/latest/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
+++ b/latest/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
@@ -25,7 +25,7 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo
 You can launch the container using the following command:
 
 ```bash
-docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5
+docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 ```
 
 
diff --git a/latest/_sources/commands/trtllm-serve/trtllm-serve.rst.txt b/latest/_sources/commands/trtllm-serve/trtllm-serve.rst.txt
index 25ed2bc394..33bad7f1e5 100644
--- a/latest/_sources/commands/trtllm-serve/trtllm-serve.rst.txt
+++ b/latest/_sources/commands/trtllm-serve/trtllm-serve.rst.txt
@@ -34,7 +34,7 @@ For the full syntax and argument descriptions, refer to :ref:`syntax`.
 Inference Endpoints
 -------------------
 
-After you start the server, you can send inference requests through completions API and Chat API, which are compatible with corresponding OpenAI APIs. We use `TinyLlama-1.1B-Chat-v1.0 <https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0>`_ for examples in the following sections.
+After you start the server, you can send inference requests through completions API, Chat API and Responses API, which are compatible with corresponding OpenAI APIs. We use `TinyLlama-1.1B-Chat-v1.0 <https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0>`_ for examples in the following sections.
 
 Chat API
 ~~~~~~~~
@@ -66,6 +66,24 @@ Another example uses ``curl``:
     :language: bash
     :linenos:
 
+Responses API
+~~~~~~~~~~~~~~~
+
+You can query Responses API with any http clients, a typical example is OpenAI Python client:
+
+.. literalinclude:: ../../../../examples/serve/openai_responses_client.py
+    :language: python
+    :linenos:
+
+Another example uses ``curl``:
+
+.. literalinclude:: ../../../../examples/serve/curl_responses_client.sh
+    :language: bash
+    :linenos:
+
+
+More openai compatible examples can be found in the `compatibility examples <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/serve/compatibility>`_ directory.
+
 Multimodal Serving
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/latest/_sources/deployment-guide/config_table.rst.txt b/latest/_sources/deployment-guide/config_table.rst.txt
new file mode 100644
index 0000000000..d28fed25a8
--- /dev/null
+++ b/latest/_sources/deployment-guide/config_table.rst.txt
@@ -0,0 +1,1074 @@
+.. include:: note_sections.rst
+   :start-after: .. start-note-traffic-patterns
+   :end-before: .. end-note-traffic-patterns
+
+.. start-deepseek-ai/DeepSeek-R1-0528
+
+.. _deepseek-ai/DeepSeek-R1-0528:
+
+`DeepSeek-R1 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`_
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :width: 100%
+   :header-rows: 1
+   :widths: 12 15 15 13 20 25
+
+   * - GPU
+     - Performance Profile
+     - ISL / OSL
+     - Concurrency
+     - Config
+     - Command
+   * - 8xB200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml``
+   * - 8xH200_SXM
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml``
+   * - 8xH200_SXM
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml``
+   * - 8xH200_SXM
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml``
+   * - 8xH200_SXM
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml``
+   * - 8xH200_SXM
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml``
+   * - 8xH200_SXM
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml``
+
+.. end-deepseek-ai/DeepSeek-R1-0528
+
+.. start-nvidia/DeepSeek-R1-0528-FP4-v2
+
+.. _nvidia/DeepSeek-R1-0528-FP4-v2:
+
+`DeepSeek-R1 (NVFP4) <https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2>`_
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :width: 100%
+   :header-rows: 1
+   :widths: 12 15 15 13 20 25
+
+   * - GPU
+     - Performance Profile
+     - ISL / OSL
+     - Concurrency
+     - Config
+     - Command
+   * - 4xB200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 128
+     - `1k1k_tp4_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 128
+     - `1k1k_tp8_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 256
+     - `1k1k_tp4_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 256
+     - `1k1k_tp8_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml``
+   * - 4xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 128
+     - `8k1k_tp4_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 128
+     - `8k1k_tp8_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 256
+     - `8k1k_tp4_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 256
+     - `8k1k_tp8_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml``
+
+.. end-nvidia/DeepSeek-R1-0528-FP4-v2
+
+.. start-openai/gpt-oss-120b
+
+.. _openai/gpt-oss-120b:
+
+`gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`_
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :width: 100%
+   :header-rows: 1
+   :widths: 12 15 15 13 20 25
+
+   * - GPU
+     - Performance Profile
+     - ISL / OSL
+     - Concurrency
+     - Config
+     - Command
+   * - B200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml``
+   * - B200_NVL
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml``
+   * - B200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml``
+   * - B200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml``
+   * - B200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml``
+   * - 8xH200_SXM
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml``
+   * - 8xH200_SXM
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml``
+   * - 8xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml``
+   * - 8xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml``
+   * - 8xH200_SXM
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml``
+
+.. end-openai/gpt-oss-120b
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
index 55deeb94fe..a887ec24b9 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
@@ -47,7 +47,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -66,7 +66,7 @@ We maintain YAML configuration files with recommended performance settings in th
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -74,7 +74,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/deepseek-r1-throughput.yaml
+```{literalinclude} ../../../examples/configs/curated/deepseek-r1-throughput.yaml
 ---
 language: shell
 prepend: |
@@ -90,7 +90,7 @@ To use the `DeepGEMM` MOE backend on B200/GB200, use this config instead:
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/deepseek-r1-deepgemm.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-deepgemm.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -98,7 +98,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/deepseek-r1-deepgemm.yaml
+```{literalinclude} ../../../examples/configs/curated/deepseek-r1-deepgemm.yaml
 ---
 language: shell
 prepend: |
@@ -154,7 +154,7 @@ These options provide control over TensorRT LLM's behavior and are set within th
 
 #### `trust_remote_code`
 
-&emsp;**Description:** Allows TensorRT LLM to download models and tokenizers from Hugging Face. This flag is passed directly to the Hugging Face API.
+* **Description:** Allows TensorRT LLM to download models and tokenizers from Hugging Face. This flag is passed directly to the Hugging Face API.
 
 #### `kv_cache_config`
 
@@ -429,3 +429,23 @@ $$
 $$
 \text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
 $$
+
+## Preconfigured Recipes
+
+The following tables list recommended configurations from the comprehensive database for different performance profiles.
+
+```{eval-rst}
+.. include:: note_sections.rst
+   :start-after: .. start-note-traffic-patterns
+   :end-before: .. end-note-traffic-patterns
+
+.. include:: config_table.rst
+   :start-after: .. start-deepseek-ai/DeepSeek-R1-0528
+   :end-before: .. end-deepseek-ai/DeepSeek-R1-0528
+```
+
+```{eval-rst}
+.. include:: config_table.rst
+   :start-after: .. start-nvidia/DeepSeek-R1-0528-FP4-v2
+   :end-before: .. end-nvidia/DeepSeek-R1-0528-FP4-v2
+```
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
index ae34c5b3ce..cc30f55e98 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
@@ -43,7 +43,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -64,7 +64,7 @@ For low-latency use cases:
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/gpt-oss-120b-latency.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-latency.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -72,7 +72,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/gpt-oss-120b-latency.yaml
+```{literalinclude} ../../../examples/configs/curated/gpt-oss-120b-latency.yaml
 ---
 language: shell
 prepend: |
@@ -88,7 +88,7 @@ For max-throughput use cases:
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/gpt-oss-120b-throughput.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-throughput.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -96,7 +96,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/gpt-oss-120b-throughput.yaml
+```{literalinclude} ../../../examples/configs/curated/gpt-oss-120b-throughput.yaml
 ---
 language: shell
 prepend: |
@@ -377,3 +377,17 @@ $$
 $$
 \text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
 $$
+
+## Preconfigured Recipes
+
+The following table lists recommended configurations from the comprehensive database for different performance profiles.
+
+```{eval-rst}
+.. include:: note_sections.rst
+   :start-after: .. start-note-traffic-patterns
+   :end-before: .. end-note-traffic-patterns
+
+.. include:: config_table.rst
+   :start-after: .. start-openai/gpt-oss-120b
+   :end-before: .. end-openai/gpt-oss-120b
+```
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md.txt
index d8ec17daff..391a72091d 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md.txt
@@ -306,3 +306,18 @@ Run `bench.sh` to begin a serving benchmark.
 ```shell
 ./bench.sh
 ```
+
+## Troubleshooting
+
+Since Kimi K2 Thinking has larger weight size than other models, it's possible seeing host OOM issues, as the following:
+
+```log
+Loading weights: 100%|█████████████████████| 1408/1408 [03:43<00:00,  6.30it/s]
+ 0: [12/04/2025-18:38:28] [TRT-LLM] [RANK 0] [I] moe_load_balancer finalizing model...
+ 1: [nvl72136-T14:452151:0:452151] Caught signal 7 (Bus error: nonexistent physical address)
+ 1: ==== backtrace (tid: 452151) ====
+ 1:  0  /usr/local/ucx//lib/libucs.so.0(ucs_handle_error+0x2cc) [0xffff9638274c]
+ 1:  1  /usr/local/ucx//lib/libucs.so.0(+0x328fc) [0xffff963828fc]
+ 1:  2  /usr/local/ucx//lib/libucs.so.0(+0x32c78) [0xffff96382c78]
+```
+This can be addressed by mounting `tmpfs:/dev/shm:size=640G` when launching the Docker container, to increase the shm size that the container can access.
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
index d227b2f440..b45b7d2ffa 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
@@ -39,7 +39,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -58,7 +58,7 @@ We maintain YAML configuration files with recommended performance settings in th
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/llama-3.3-70b.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/llama-3.3-70b.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -66,7 +66,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/llama-3.3-70b.yaml
+```{literalinclude} ../../../examples/configs/curated/llama-3.3-70b.yaml
 ---
 language: shell
 prepend: |
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
index 509a5cf00f..3e70209b21 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
@@ -38,7 +38,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -57,7 +57,7 @@ We maintain YAML configuration files with recommended performance settings in th
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/llama-4-scout.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/llama-4-scout.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -65,7 +65,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/llama-4-scout.yaml
+```{literalinclude} ../../../examples/configs/curated/llama-4-scout.yaml
 ---
 language: shell
 prepend: |
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md.txt
index 246fc74a56..46bf724b71 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md.txt
@@ -35,7 +35,7 @@ We maintain YAML configuration files with recommended performance settings in th
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/qwen3-next.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/qwen3-next.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -43,7 +43,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/qwen3-next.yaml
+```{literalinclude} ../../../examples/configs/curated/qwen3-next.yaml
 ---
 language: shell
 prepend: |
diff --git a/latest/_sources/deployment-guide/deployment-guide-for-qwen3-on-trtllm.md.txt b/latest/_sources/deployment-guide/deployment-guide-for-qwen3-on-trtllm.md.txt
index 190740ebd8..894c6a1e63 100644
--- a/latest/_sources/deployment-guide/deployment-guide-for-qwen3-on-trtllm.md.txt
+++ b/latest/_sources/deployment-guide/deployment-guide-for-qwen3-on-trtllm.md.txt
@@ -40,7 +40,7 @@ We maintain YAML configuration files with recommended performance settings in th
 
 ```shell
 TRTLLM_DIR=/app/tensorrt_llm # change as needed to match your environment
-EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/qwen3.yaml
+EXTRA_LLM_API_FILE=${TRTLLM_DIR}/examples/configs/curated/qwen3.yaml
 ```
 
 Note: if you don't have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.
@@ -48,7 +48,7 @@ Note: if you don't have access to the source code locally, you can manually crea
 ````{admonition} Show code
 :class: dropdown
 
-```{literalinclude} ../../../examples/configs/qwen3.yaml
+```{literalinclude} ../../../examples/configs/curated/qwen3.yaml
 ---
 language: shell
 prepend: |
diff --git a/latest/_sources/deployment-guide/index.rst.txt b/latest/_sources/deployment-guide/index.rst.txt
index ed7fd9c536..644a9d9ae9 100644
--- a/latest/_sources/deployment-guide/index.rst.txt
+++ b/latest/_sources/deployment-guide/index.rst.txt
@@ -6,15 +6,20 @@ Quick Start for Popular Models
 
 The table below contains ``trtllm-serve`` commands that can be used to easily deploy popular models including DeepSeek-R1, gpt-oss, Llama 4, Qwen3, and more.
 
-We maintain LLM API configuration files for these models containing recommended performance settings in the `examples/configs <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs>`_ directory. The TensorRT LLM Docker container makes the config files available at ``/app/tensorrt_llm/examples/configs``, but you can customize this as needed:
+We maintain LLM API configuration files for these models containing recommended performance settings in two locations:
+
+* **Curated Examples**: `examples/configs/curated <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs/curated>`_ - Hand-picked configurations for common scenarios.
+* **Comprehensive Database**: `examples/configs/database <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs/database>`_ - A more comprehensive set of known-good configurations for various GPUs and traffic patterns.
+
+The TensorRT LLM Docker container makes these config files available at ``/app/tensorrt_llm/examples/configs/curated`` and ``/app/tensorrt_llm/examples/configs/database`` respectively. You can reference them as needed:
 
 .. code-block:: bash
 
    export TRTLLM_DIR="/app/tensorrt_llm" # path to the TensorRT LLM repo in your local environment
 
-.. note::
-
-   The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, you may benefit from additional tuning. In the future, we plan to provide more configs for a wider range of traffic patterns.
+.. include:: note_sections.rst
+   :start-after: .. start-note-quick-start-isl-osl
+   :end-before: .. end-note-quick-start-isl-osl
 
 This table is designed to provide a straightforward starting point; for detailed model-specific deployment guides, check out the guides below.
 
@@ -30,53 +35,53 @@ This table is designed to provide a straightforward starting point; for detailed
    * - `DeepSeek-R1 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`_
      - H100, H200
      - Max Throughput
-     - `deepseek-r1-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-throughput.yaml>`_
-     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml``
+     - `deepseek-r1-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-throughput.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml``
    * - `DeepSeek-R1 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`_
      - B200, GB200
      - Max Throughput
-     - `deepseek-r1-deepgemm.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-deepgemm.yaml>`_
-     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-deepgemm.yaml``
+     - `deepseek-r1-deepgemm.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-deepgemm.yaml>`_
+     - ``trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-deepgemm.yaml``
    * - `DeepSeek-R1 (NVFP4) <https://huggingface.co/nvidia/DeepSeek-R1-FP4>`_
      - B200, GB200
      - Max Throughput
-     - `deepseek-r1-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-throughput.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-FP4 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml``
+     - `deepseek-r1-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-throughput.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-FP4 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml``
    * - `DeepSeek-R1 (NVFP4) <https://huggingface.co/nvidia/DeepSeek-R1-FP4-v2>`_
      - B200, GB200
      - Min Latency
-     - `deepseek-r1-latency.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-latency.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-latency.yaml``
+     - `deepseek-r1-latency.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-latency.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-latency.yaml``
    * - `gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`_
      - Any
      - Max Throughput
-     - `gpt-oss-120b-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/gpt-oss-120b-throughput.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/gpt-oss-120b-throughput.yaml``
+     - `gpt-oss-120b-throughput.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/gpt-oss-120b-throughput.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-throughput.yaml``
    * - `gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`_
      - Any
      - Min Latency
-     - `gpt-oss-120b-latency.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/gpt-oss-120b-latency.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/gpt-oss-120b-latency.yaml``
+     - `gpt-oss-120b-latency.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/gpt-oss-120b-latency.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-latency.yaml``
    * - `Qwen3-Next-80B-A3B-Thinking <https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking>`_
      - Any
      - Max Throughput
-     - `qwen3-next.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/qwen3-next.yaml>`_
-     - ``trtllm-serve Qwen/Qwen3-Next-80B-A3B-Thinking --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/qwen3-next.yaml``
+     - `qwen3-next.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/qwen3-next.yaml>`_
+     - ``trtllm-serve Qwen/Qwen3-Next-80B-A3B-Thinking --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/qwen3-next.yaml``
    * - Qwen3 family (e.g. `Qwen3-30B-A3B <https://huggingface.co/Qwen/Qwen3-30B-A3B>`_)
      - Any
      - Max Throughput
-     - `qwen3.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/qwen3.yaml>`_
-     - ``trtllm-serve Qwen/Qwen3-30B-A3B --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/qwen3.yaml`` (swap to another Qwen3 model name as needed)
+     - `qwen3.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/qwen3.yaml>`_
+     - ``trtllm-serve Qwen/Qwen3-30B-A3B --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/qwen3.yaml`` (swap to another Qwen3 model name as needed)
    * - `Llama-3.3-70B (FP8) <https://huggingface.co/nvidia/Llama-3.3-70B-Instruct-FP8>`_
      - Any
      - Max Throughput
-     - `llama-3.3-70b.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/llama-3.3-70b.yaml>`_
-     - ``trtllm-serve nvidia/Llama-3.3-70B-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/llama-3.3-70b.yaml``
+     - `llama-3.3-70b.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/llama-3.3-70b.yaml>`_
+     - ``trtllm-serve nvidia/Llama-3.3-70B-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/llama-3.3-70b.yaml``
    * - `Llama 4 Scout (FP8) <https://huggingface.co/nvidia/Llama-4-Scout-17B-16E-Instruct-FP8>`_
      - Any
      - Max Throughput
-     - `llama-4-scout.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/llama-4-scout.yaml>`_
-     - ``trtllm-serve nvidia/Llama-4-Scout-17B-16E-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/llama-4-scout.yaml``
+     - `llama-4-scout.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/llama-4-scout.yaml>`_
+     - ``trtllm-serve nvidia/Llama-4-Scout-17B-16E-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/llama-4-scout.yaml``
 
 Model-Specific Deployment Guides
 ---------------------------------
@@ -94,3 +99,10 @@ The deployment guides below provide more detailed instructions for serving speci
    deployment-guide-for-qwen3-on-trtllm.md
    deployment-guide-for-qwen3-next-on-trtllm.md
    deployment-guide-for-kimi-k2-thinking-on-trtllm.md
+
+Comprehensive Configuration Database
+------------------------------------
+
+The table below lists all available pre-configured model scenarios in the TensorRT LLM configuration database. Each row represents a specific model, GPU, and performance profile combination with recommended request settings.
+
+.. include:: config_table.rst
diff --git a/latest/_sources/deployment-guide/note_sections.rst.txt b/latest/_sources/deployment-guide/note_sections.rst.txt
new file mode 100644
index 0000000000..4cd0d1c41d
--- /dev/null
+++ b/latest/_sources/deployment-guide/note_sections.rst.txt
@@ -0,0 +1,36 @@
+..
+   Reusable note sections for deployment guides.
+   Include specific notes using:
+
+   .. include:: note_sections.rst
+      :start-after: .. start-note-<name>
+      :end-before: .. end-note-<name>
+
+.. start-note-traffic-patterns
+
+.. note::
+
+   **Traffic Patterns**: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+   values in each configuration represent the **maximum supported values** for that config.
+   Requests exceeding these limits may result in errors.
+
+   To handle requests with input sequences **longer than the configured ISL**, add the following
+   to your config file:
+
+   .. code-block:: yaml
+
+      enable_chunked_prefill: true
+
+   This enables chunked prefill, which processes long input sequences in chunks rather than
+   requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+   does **not** guarantee optimal performance—these configs are tuned for the specified ISL/OSL.
+
+.. end-note-traffic-patterns
+
+.. start-note-quick-start-isl-osl
+
+.. note::
+
+   The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the :ref:`Comprehensive Configuration Database` section below which covers a larger set of traffic patterns and performance profiles.
+
+.. end-note-quick-start-isl-osl
diff --git a/latest/_sources/developer-guide/perf-analysis.md.txt b/latest/_sources/developer-guide/perf-analysis.md.txt
index 3ac01d82ed..4aa26ecbda 100644
--- a/latest/_sources/developer-guide/perf-analysis.md.txt
+++ b/latest/_sources/developer-guide/perf-analysis.md.txt
@@ -72,10 +72,12 @@ Say we want to profile iterations 100 to 150 on a `trtllm-bench`/`trtllm-serve`
 #!/bin/bash
 
 # Prepare dataset for the benchmark
-python3 benchmarks/cpp/prepare_dataset.py \
-    --tokenizer=${MODEL_PATH} \
-    --stdout token-norm-dist --num-requests=${NUM_SAMPLES} \
-    --input-mean=1000 --output-mean=1000 --input-stdev=0 --output-stdev=0 > /tmp/dataset.txt
+trtllm-bench --model ${MODEL_PATH} \
+    prepare-dataset \
+    --output dataset.txt \
+    token-norm-dist \
+    --num-requests=${NUM_SAMPLES} \
+    --input-mean=1000 --output-mean=1000 --input-stdev=0 --output-stdev=0
 
 # Benchmark and profile
 TLLM_PROFILE_START_STOP=100-150 nsys profile \
diff --git a/latest/_sources/developer-guide/perf-benchmarking.md.txt b/latest/_sources/developer-guide/perf-benchmarking.md.txt
index 4e4e3ca421..63bd9f6f8f 100644
--- a/latest/_sources/developer-guide/perf-benchmarking.md.txt
+++ b/latest/_sources/developer-guide/perf-benchmarking.md.txt
@@ -152,7 +152,7 @@ directory. For example, to generate a synthetic dataset of 1000 requests with a
 128/128 for [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B), run:
 
 ```shell
-python benchmarks/cpp/prepare_dataset.py --stdout --tokenizer meta-llama/Llama-3.1-8B token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 1000 > /tmp/synthetic_128_128.txt
+trtllm-bench --model meta-llama/Llama-3.1-8B prepare-dataset --output /tmp/synthetic_128_128.txt token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 1000
 ```
 
 ### Running with the PyTorch Workflow
@@ -233,13 +233,13 @@ The PyTorch workflow supports benchmarking with LoRA (Low-Rank Adaptation) adapt
 
 **Preparing LoRA Dataset**
 
-Use `prepare_dataset.py` with LoRA-specific options to generate requests with LoRA metadata:
+Use `trtllm-bench prepare-dataset` with LoRA-specific options to generate requests with LoRA metadata:
 
 ```shell
-python3 benchmarks/cpp/prepare_dataset.py \
-  --stdout \
+trtllm-bench \
+  --model /path/to/tokenizer \
+  prepare-dataset \
   --rand-task-id 0 1 \
-  --tokenizer /path/to/tokenizer \
   --lora-dir /path/to/loras \
   token-norm-dist \
   --num-requests 100 \
@@ -310,17 +310,18 @@ Each subdirectory should contain the LoRA adapter files for that specific task.
 To benchmark multi-modal models with PyTorch workflow, you can follow the similar approach as above.
 
 First, prepare the dataset:
-```python
-python ./benchmarks/cpp/prepare_dataset.py \
-  --tokenizer Qwen/Qwen2-VL-2B-Instruct \
-  --stdout \
-  dataset \
+```bash
+trtllm-bench \
+  --model Qwen/Qwen2-VL-2B-Instruct \
+  prepare-dataset \
+  --output mm_data.jsonl
+  real-dataset
   --dataset-name lmms-lab/MMMU \
   --dataset-split test \
   --dataset-image-key image \
   --dataset-prompt-key question \
   --num-requests 10 \
-  --output-len-dist 128,5 > mm_data.jsonl
+  --output-len-dist 128,5
 ```
 It will download the media files to `/tmp` directory and prepare the dataset with their paths. Note that the `prompt` fields are texts and not tokenized ids. This is due to the fact that
 the `prompt` and the media (image/video) are processed by a preprocessor for multimodal files.
@@ -423,10 +424,10 @@ checkpoint. For the Llama-3.1 models, TensorRT LLM provides the following checkp
 - [`nvidia/Llama-3.1-70B-Instruct-FP8`](https://huggingface.co/nvidia/Llama-3.1-70B-Instruct-FP8)
 - [`nvidia/Llama-3.1-405B-Instruct-FP8`](https://huggingface.co/nvidia/Llama-3.1-405B-Instruct-FP8)
 
-To understand more about how to quantize your own checkpoints, refer to ModelOpt [documentation](https://nvidia.github.io/TensorRT-Model-Optimizer/deployment/1_tensorrt_llm.html).
+To understand more about how to quantize your own checkpoints, refer to ModelOpt [documentation](https://nvidia.github.io/Model-Optimizer/deployment/1_tensorrt_llm.html).
 
 `trtllm-bench` utilizes the `hf_quant_config.json` file present in the pre-quantized checkpoints above. The configuration
-file is present in checkpoints quantized with [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer)
+file is present in checkpoints quantized with [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer)
 and describes the compute and KV cache quantization that checkpoint was compiled with. For example, from the checkpoints
 above:
 
diff --git a/latest/_sources/developer-guide/perf-overview.md.txt b/latest/_sources/developer-guide/perf-overview.md.txt
index 0a144a58d4..aefa91fd43 100644
--- a/latest/_sources/developer-guide/perf-overview.md.txt
+++ b/latest/_sources/developer-guide/perf-overview.md.txt
@@ -21,7 +21,7 @@ and shows the throughput scenario under maximum load. The reported metric is `To
 
 The performance numbers below were collected using the steps described in this document.
 
-Testing was performed on models with weights quantized using [ModelOpt](https://nvidia.github.io/TensorRT-Model-Optimizer/#) and published by NVIDIA on the [Model Optimizer HuggingFace Collection](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4).
+Testing was performed on models with weights quantized using [ModelOpt](https://nvidia.github.io/Model-Optimizer/#) and published by NVIDIA on the [Model Optimizer HuggingFace Collection](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4).
 
 *(NEW for v1.0) RTX 6000 Pro Blackwell Server Edition Benchmarks:*
 
diff --git a/latest/_sources/examples/curl_chat_client.rst.txt b/latest/_sources/examples/curl_chat_client.rst.txt
index d3709ccd9c..f5a6ef236b 100644
--- a/latest/_sources/examples/curl_chat_client.rst.txt
+++ b/latest/_sources/examples/curl_chat_client.rst.txt
@@ -2,7 +2,7 @@ Curl Chat Client
 ================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_chat_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_chat_client.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_chat_client.sh
     :lines: 1-11
diff --git a/latest/_sources/examples/curl_chat_client_for_multimodal.rst.txt b/latest/_sources/examples/curl_chat_client_for_multimodal.rst.txt
index 73760884c2..17e6340f42 100644
--- a/latest/_sources/examples/curl_chat_client_for_multimodal.rst.txt
+++ b/latest/_sources/examples/curl_chat_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ Curl Chat Client For Multimodal
 ===============================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_chat_client_for_multimodal.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_chat_client_for_multimodal.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_chat_client_for_multimodal.sh
     :lines: 1-88
diff --git a/latest/_sources/examples/curl_completion_client.rst.txt b/latest/_sources/examples/curl_completion_client.rst.txt
index c2f4e9a14e..b4ef6aa5d3 100644
--- a/latest/_sources/examples/curl_completion_client.rst.txt
+++ b/latest/_sources/examples/curl_completion_client.rst.txt
@@ -2,7 +2,7 @@ Curl Completion Client
 ======================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_completion_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_completion_client.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_completion_client.sh
     :lines: 1-10
diff --git a/latest/_sources/examples/curl_responses_client.rst.txt b/latest/_sources/examples/curl_responses_client.rst.txt
new file mode 100644
index 0000000000..bcb3bcd62b
--- /dev/null
+++ b/latest/_sources/examples/curl_responses_client.rst.txt
@@ -0,0 +1,10 @@
+Curl Responses Client
+=====================
+Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
+
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_responses_client.sh.
+
+.. literalinclude:: ../../../examples/serve/curl_responses_client.sh
+    :lines: 1-9
+    :language: bash
+    :linenos:
diff --git a/latest/_sources/examples/deepseek_r1_reasoning_parser.rst.txt b/latest/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
index 4e0a039fe1..4121dcc52f 100644
--- a/latest/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
+++ b/latest/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
@@ -2,7 +2,7 @@ Deepseek R1 Reasoning Parser
 ============================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/deepseek_r1_reasoning_parser.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/deepseek_r1_reasoning_parser.sh.
 
 .. literalinclude:: ../../../examples/serve/deepseek_r1_reasoning_parser.sh
     :lines: 1-23
diff --git a/latest/_sources/examples/genai_perf_client.rst.txt b/latest/_sources/examples/genai_perf_client.rst.txt
index 4f222352aa..9bb9012949 100644
--- a/latest/_sources/examples/genai_perf_client.rst.txt
+++ b/latest/_sources/examples/genai_perf_client.rst.txt
@@ -2,7 +2,7 @@ Genai Perf Client
 =================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/genai_perf_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/genai_perf_client.sh.
 
 .. literalinclude:: ../../../examples/serve/genai_perf_client.sh
     :lines: 1-16
diff --git a/latest/_sources/examples/genai_perf_client_for_multimodal.rst.txt b/latest/_sources/examples/genai_perf_client_for_multimodal.rst.txt
index 6ae821dace..aa6f66eace 100644
--- a/latest/_sources/examples/genai_perf_client_for_multimodal.rst.txt
+++ b/latest/_sources/examples/genai_perf_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ Genai Perf Client For Multimodal
 ================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/genai_perf_client_for_multimodal.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/genai_perf_client_for_multimodal.sh.
 
 .. literalinclude:: ../../../examples/serve/genai_perf_client_for_multimodal.sh
     :lines: 1-19
diff --git a/latest/_sources/examples/llm_guided_decoding.rst.txt b/latest/_sources/examples/llm_guided_decoding.rst.txt
index c7a50512da..c1c9622871 100644
--- a/latest/_sources/examples/llm_guided_decoding.rst.txt
+++ b/latest/_sources/examples/llm_guided_decoding.rst.txt
@@ -1,6 +1,6 @@
 Generate text with guided decoding
 ==================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_guided_decoding.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_guided_decoding.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_guided_decoding.py
     :lines: 4-47
diff --git a/latest/_sources/examples/llm_inference.rst.txt b/latest/_sources/examples/llm_inference.rst.txt
index be80e456eb..a0379d8bf0 100644
--- a/latest/_sources/examples/llm_inference.rst.txt
+++ b/latest/_sources/examples/llm_inference.rst.txt
@@ -1,6 +1,6 @@
 Generate text
 =============
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference.py
     :lines: 4-35
diff --git a/latest/_sources/examples/llm_inference_async.rst.txt b/latest/_sources/examples/llm_inference_async.rst.txt
index f7ff40a646..3da36720c2 100644
--- a/latest/_sources/examples/llm_inference_async.rst.txt
+++ b/latest/_sources/examples/llm_inference_async.rst.txt
@@ -1,6 +1,6 @@
 Generate text asynchronously
 ============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_async.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_async.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_async.py
     :lines: 4-43
diff --git a/latest/_sources/examples/llm_inference_async_streaming.rst.txt b/latest/_sources/examples/llm_inference_async_streaming.rst.txt
index 0736586f2f..5d4711e145 100644
--- a/latest/_sources/examples/llm_inference_async_streaming.rst.txt
+++ b/latest/_sources/examples/llm_inference_async_streaming.rst.txt
@@ -1,6 +1,6 @@
 Generate text in streaming
 ==========================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_async_streaming.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_async_streaming.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_async_streaming.py
     :lines: 4-64
diff --git a/latest/_sources/examples/llm_inference_distributed.rst.txt b/latest/_sources/examples/llm_inference_distributed.rst.txt
index a04aa99313..07cc8963df 100644
--- a/latest/_sources/examples/llm_inference_distributed.rst.txt
+++ b/latest/_sources/examples/llm_inference_distributed.rst.txt
@@ -1,6 +1,6 @@
 Distributed LLM Generation
 ==========================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_distributed.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_distributed.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_distributed.py
     :lines: 4-44
diff --git a/latest/_sources/examples/llm_kv_cache_connector.rst.txt b/latest/_sources/examples/llm_kv_cache_connector.rst.txt
index 0a150c4a36..32b443ae33 100644
--- a/latest/_sources/examples/llm_kv_cache_connector.rst.txt
+++ b/latest/_sources/examples/llm_kv_cache_connector.rst.txt
@@ -1,6 +1,6 @@
 KV Cache Connector
 ==================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_kv_cache_connector.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_kv_cache_connector.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_kv_cache_connector.py
     :lines: 4-326
diff --git a/latest/_sources/examples/llm_kv_cache_offloading.rst.txt b/latest/_sources/examples/llm_kv_cache_offloading.rst.txt
index a64445a962..5ae7bb74b1 100644
--- a/latest/_sources/examples/llm_kv_cache_offloading.rst.txt
+++ b/latest/_sources/examples/llm_kv_cache_offloading.rst.txt
@@ -1,6 +1,6 @@
 KV Cache Offloading
 ===================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_kv_cache_offloading.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_kv_cache_offloading.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_kv_cache_offloading.py
     :lines: 4-134
diff --git a/latest/_sources/examples/llm_logits_processor.rst.txt b/latest/_sources/examples/llm_logits_processor.rst.txt
index b739b44ca9..e2c401f98b 100644
--- a/latest/_sources/examples/llm_logits_processor.rst.txt
+++ b/latest/_sources/examples/llm_logits_processor.rst.txt
@@ -1,6 +1,6 @@
 Control generated text using logits processor
 =============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_logits_processor.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_logits_processor.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_logits_processor.py
     :lines: 4-128
diff --git a/latest/_sources/examples/llm_mgmn_llm_distributed.rst.txt b/latest/_sources/examples/llm_mgmn_llm_distributed.rst.txt
index 0a84a19a28..fbaaae9489 100644
--- a/latest/_sources/examples/llm_mgmn_llm_distributed.rst.txt
+++ b/latest/_sources/examples/llm_mgmn_llm_distributed.rst.txt
@@ -1,6 +1,6 @@
 Run LLM-API with pytorch backend on Slurm
 =========================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_llm_distributed.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_llm_distributed.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_llm_distributed.sh
     :lines: 1-48,52-94
diff --git a/latest/_sources/examples/llm_mgmn_trtllm_bench.rst.txt b/latest/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
index ddfa9f47ca..bb9f5bfdb7 100644
--- a/latest/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
+++ b/latest/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
@@ -1,8 +1,8 @@
 Run trtllm-bench with pytorch backend on Slurm
 ==============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_trtllm_bench.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_trtllm_bench.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_trtllm_bench.sh
-    :lines: 1-46,50-131
+    :lines: 1-46,50-130
     :language: bash
     :linenos:
diff --git a/latest/_sources/examples/llm_mgmn_trtllm_serve.rst.txt b/latest/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
index 18e6c10c8c..d3ebb95460 100644
--- a/latest/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
+++ b/latest/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
@@ -1,6 +1,6 @@
 Run trtllm-serve with pytorch backend on Slurm
 ==============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_trtllm_serve.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_trtllm_serve.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_trtllm_serve.sh
     :lines: 1-46,50-92
diff --git a/latest/_sources/examples/llm_multilora.rst.txt b/latest/_sources/examples/llm_multilora.rst.txt
index b0f9fdf5ec..5a4ef4786d 100644
--- a/latest/_sources/examples/llm_multilora.rst.txt
+++ b/latest/_sources/examples/llm_multilora.rst.txt
@@ -1,6 +1,6 @@
 Generate text with multiple LoRA adapters
 =========================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_multilora.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_multilora.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_multilora.py
     :lines: 4-89
diff --git a/latest/_sources/examples/llm_runtime.rst.txt b/latest/_sources/examples/llm_runtime.rst.txt
index c7405bcbe5..b5c67ea9d7 100644
--- a/latest/_sources/examples/llm_runtime.rst.txt
+++ b/latest/_sources/examples/llm_runtime.rst.txt
@@ -1,6 +1,6 @@
 Runtime Configuration Examples
 ==============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_runtime.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_runtime.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_runtime.py
     :lines: 4-144
diff --git a/latest/_sources/examples/llm_sampling.rst.txt b/latest/_sources/examples/llm_sampling.rst.txt
index bc4c60a7ce..050450c330 100644
--- a/latest/_sources/examples/llm_sampling.rst.txt
+++ b/latest/_sources/examples/llm_sampling.rst.txt
@@ -1,6 +1,6 @@
 Sampling Techniques Showcase
 ============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_sampling.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_sampling.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_sampling.py
     :lines: 4-248
diff --git a/latest/_sources/examples/llm_sparse_attention.rst.txt b/latest/_sources/examples/llm_sparse_attention.rst.txt
index 1c398bb1f0..c13f175d1e 100644
--- a/latest/_sources/examples/llm_sparse_attention.rst.txt
+++ b/latest/_sources/examples/llm_sparse_attention.rst.txt
@@ -1,6 +1,6 @@
 Sparse Attention
 ================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_sparse_attention.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_sparse_attention.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_sparse_attention.py
     :lines: 4-229
diff --git a/latest/_sources/examples/llm_speculative_decoding.rst.txt b/latest/_sources/examples/llm_speculative_decoding.rst.txt
index 689d6af530..dbfca2fb58 100644
--- a/latest/_sources/examples/llm_speculative_decoding.rst.txt
+++ b/latest/_sources/examples/llm_speculative_decoding.rst.txt
@@ -1,6 +1,6 @@
 Speculative Decoding
 ====================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_speculative_decoding.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_speculative_decoding.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_speculative_decoding.py
     :lines: 4-95
diff --git a/latest/_sources/examples/openai_chat_client.rst.txt b/latest/_sources/examples/openai_chat_client.rst.txt
index 29cf974ab0..bc25fbfefb 100644
--- a/latest/_sources/examples/openai_chat_client.rst.txt
+++ b/latest/_sources/examples/openai_chat_client.rst.txt
@@ -2,7 +2,7 @@ OpenAI Chat Client
 ==================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_chat_client.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_chat_client.py.
 
 .. literalinclude:: ../../../examples/serve/openai_chat_client.py
     :lines: 2-21
diff --git a/latest/_sources/examples/openai_chat_client_for_multimodal.rst.txt b/latest/_sources/examples/openai_chat_client_for_multimodal.rst.txt
index b3fb0a07bc..9eb49504d9 100644
--- a/latest/_sources/examples/openai_chat_client_for_multimodal.rst.txt
+++ b/latest/_sources/examples/openai_chat_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ OpenAI Chat Client for Multimodal
 =================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_chat_client_for_multimodal.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_chat_client_for_multimodal.py.
 
 .. literalinclude:: ../../../examples/serve/openai_chat_client_for_multimodal.py
     :lines: 2-129
diff --git a/latest/_sources/examples/openai_completion_client.rst.txt b/latest/_sources/examples/openai_completion_client.rst.txt
index 7b60afc04d..54a9fac182 100644
--- a/latest/_sources/examples/openai_completion_client.rst.txt
+++ b/latest/_sources/examples/openai_completion_client.rst.txt
@@ -2,7 +2,7 @@ OpenAI Completion Client
 ========================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client.py
     :lines: 2-15
diff --git a/latest/_sources/examples/openai_completion_client_for_lora.rst.txt b/latest/_sources/examples/openai_completion_client_for_lora.rst.txt
index 4eabf04fea..121ff107e2 100644
--- a/latest/_sources/examples/openai_completion_client_for_lora.rst.txt
+++ b/latest/_sources/examples/openai_completion_client_for_lora.rst.txt
@@ -2,7 +2,7 @@ Openai Completion Client For Lora
 =================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client_for_lora.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client_for_lora.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client_for_lora.py
     :lines: 1-30
diff --git a/latest/_sources/examples/openai_completion_client_json_schema.rst.txt b/latest/_sources/examples/openai_completion_client_json_schema.rst.txt
index 8ed397f1cd..1eee39507d 100644
--- a/latest/_sources/examples/openai_completion_client_json_schema.rst.txt
+++ b/latest/_sources/examples/openai_completion_client_json_schema.rst.txt
@@ -2,7 +2,7 @@ OpenAI Completion Client with JSON Schema
 =========================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client_json_schema.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client_json_schema.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client_json_schema.py
     :lines: 2-52
diff --git a/latest/_sources/examples/openai_responses_client.rst.txt b/latest/_sources/examples/openai_responses_client.rst.txt
new file mode 100644
index 0000000000..f8b4c62bc5
--- /dev/null
+++ b/latest/_sources/examples/openai_responses_client.rst.txt
@@ -0,0 +1,10 @@
+OpenAI Responses Client
+=======================
+Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
+
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_responses_client.py.
+
+.. literalinclude:: ../../../examples/serve/openai_responses_client.py
+    :lines: 2-15
+    :language: python
+    :linenos:
diff --git a/latest/_sources/examples/trtllm_serve_examples.rst.txt b/latest/_sources/examples/trtllm_serve_examples.rst.txt
index f39dfcee67..e61fd0e9ff 100644
--- a/latest/_sources/examples/trtllm_serve_examples.rst.txt
+++ b/latest/_sources/examples/trtllm_serve_examples.rst.txt
@@ -10,6 +10,7 @@ Online Serving Examples
    curl_chat_client
    curl_chat_client_for_multimodal
    curl_completion_client
+   curl_responses_client
    deepseek_r1_reasoning_parser
    genai_perf_client
    genai_perf_client_for_multimodal
@@ -18,4 +19,5 @@ Online Serving Examples
    openai_completion_client
    openai_completion_client_for_lora
    openai_completion_client_json_schema
+   openai_responses_client
 
diff --git a/latest/_sources/features/auto_deploy/support_matrix.md.txt b/latest/_sources/features/auto_deploy/support_matrix.md.txt
index 26c07b308b..fec6d841af 100644
--- a/latest/_sources/features/auto_deploy/support_matrix.md.txt
+++ b/latest/_sources/features/auto_deploy/support_matrix.md.txt
@@ -120,7 +120,7 @@ Optimize attention operations with different attention kernel implementations:
 
 ### Precision Support
 
-AutoDeploy supports models with various precision formats, including quantized checkpoints generated by [`TensorRT-Model-Optimizer`](https://github.com/NVIDIA/TensorRT-Model-Optimizer).
+AutoDeploy supports models with various precision formats, including quantized checkpoints generated by [`Model-Optimizer`](https://github.com/NVIDIA/Model-Optimizer).
 
 **Supported precision types include:**
 
diff --git a/latest/_sources/features/quantization.md.txt b/latest/_sources/features/quantization.md.txt
index 8a0e160529..7998f1c03a 100644
--- a/latest/_sources/features/quantization.md.txt
+++ b/latest/_sources/features/quantization.md.txt
@@ -11,6 +11,7 @@ TensorRT LLM offers a variety of quantization recipes to optimize LLM inference.
 * FP8 Block Scaling
 * FP8 Rowwise
 * FP8 KV Cache
+* NVFP4 KV Cache
 * W4A16 GPTQ
 * W4A8 GPTQ
 * W4A16 AWQ
@@ -23,7 +24,7 @@ The default PyTorch backend supports FP4 and FP8 quantization on the latest Blac
 
 ### Running Pre-quantized Models
 
-TensorRT LLM can directly run [pre-quantized models](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4) generated with the [NVIDIA TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer).
+TensorRT LLM can directly run [pre-quantized models](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4) generated with the [NVIDIA Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
 
 ```python
 from tensorrt_llm import LLM
@@ -47,6 +48,20 @@ llm = LLM(model='/path/to/model',
 llm.generate("Hello, my name is")
 ```
 
+#### NVFP4 KV Cache
+
+To enable NVFP4 KV cache, offline quantization with ModelOpt is required. Please follow the below section for instructions.
+After the quantization is done, the NVFP4 KV cache option can be set by:
+
+```python
+from tensorrt_llm import LLM
+from tensorrt_llm.llmapi import KvCacheConfig
+llm = LLM(model='/path/to/model',
+          kv_cache_config=KvCacheConfig(dtype='nvfp4'))
+llm.generate("Hello, my name is")
+```
+
+
 ### Offline Quantization with ModelOpt
 
 If a pre-quantized model is not available on the [Hugging Face Hub](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4), you can quantize it offline using ModelOpt.
@@ -54,35 +69,47 @@ If a pre-quantized model is not available on the [Hugging Face Hub](https://hugg
 Follow this step-by-step guide to quantize a model:
 
 ```bash
-git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
-cd TensorRT-Model-Optimizer/examples/llm_ptq
-scripts/huggingface_example.sh --model <huggingface_model_card> --quant fp8 --export_fmt hf
+git clone https://github.com/NVIDIA/Model-Optimizer.git
+cd Model-Optimizer/examples/llm_ptq
+scripts/huggingface_example.sh --model <huggingface_model_card> --quant fp8
 ```
 
+#### NVFP4 KV Cache
+
+To generate the checkpoint for NVFP4 KV cache:
+
+```bash
+git clone https://github.com/NVIDIA/Model-Optimizer.git
+cd TensorRT-Model-Optimizer/examples/llm_ptq
+scripts/huggingface_example.sh --model <huggingface_model_card> --quant fp8 --kv_cache_quant nvfp4
+```
+
+Note that currently TRT-LLM only supports FP8 weight/activation quantization when NVFP4 KV cache is enabled. Therefore, `--quant fp8` is required here.
+
 ## Model Supported Matrix
 
-| Model          |  NVFP4  | MXFP4  | FP8(per tensor)| FP8(block scaling) | FP8(rowwise) | FP8 KV Cache |W4A8 AWQ  | W4A16 AWQ | W4A8 GPTQ  | W4A16 GPTQ |
-| :------------- | :---:   | :---:  | :---: | :---: | :---: | :---: | :-------: | :-------: | :--------: | :--------: |
-| BERT           |   .     |   .    |   .   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| DeepSeek-R1    |   Y     |   .    |   .   |   Y   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| EXAONE         |   .     |   .    |   Y   |   .   |   .   |   Y   |     Y     |     Y     |     .      |     .      |
-| Gemma 3        |   .     |   .    |   Y   |   .   |   .   |   Y   |     Y     |     Y     |     .      |     .      |
-| GPT-OSS        |   .     |   Y    |   .   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| LLaMA          |   Y     |   .    |   Y   |   .   |   .   |   Y   |     .     |     Y     |     .      |     Y      |
-| LLaMA-v2       |   Y     |   .    |   Y   |   .   |   .   |   Y   |     Y     |     Y     |     .      |     Y      |
-| LLaMA 3        |   .     |   .    |   .   |   .   |   Y   |   Y   |     Y     |     .     |     .      |     .      |
-| LLaMA 4        |   Y     |   .    |   Y   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| Mistral        |   .     |   .    |   Y   |   .   |   .   |   Y   |     .     |     Y     |     .      |     .      |
-| Mixtral        |   Y     |   .    |   Y   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| Phi            |   .     |   .    |   .   |   .   |   .   |   Y   |     Y     |     .     |     .      |     .      |
-| Qwen           |   .     |   .    |   .   |   .   |   .   |   Y   |     Y     |     Y     |     .      |     Y      |
-| Qwen-2/2.5     |   Y     |   .    |   Y   |   .   |   .   |   Y   |     Y     |     Y     |     .      |     Y      |
-| Qwen-3         |   Y     |   .    |   Y   |   .   |   .   |   Y   |     .     |     Y     |     .      |     Y      |
-| BLIP2-OPT      |   .     |   .    |   .   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| BLIP2-T5       |   .     |   .    |   .   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| LLaVA          |   .     |   .    |   Y   |   .   |   .   |   Y   |     .     |     Y     |     .      |     Y      |
-| VILA           |   .     |   .    |   Y   |   .   |   .   |   Y   |     .     |     Y     |     .      |     Y      |
-| Nougat         |   .     |   .    |   .   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
+| Model          |  NVFP4  | MXFP4  | FP8(per tensor)| FP8(block scaling) | FP8(rowwise) | FP8 KV Cache | NVFP4 KV Cache | W4A8 AWQ  | W4A16 AWQ | W4A8 GPTQ  | W4A16 GPTQ |
+| :------------- | :---:   | :---:  | :---: | :---: | :---: | :---: |:---:| :-------: | :-------: | :--------: | :--------: |
+| BERT           |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| DeepSeek-R1    |   Y     |   .    |   .   |   Y   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| EXAONE         |   .     |   .    |   Y   |   .   |   .   |   Y   |  .  |     Y     |     Y     |     .      |     .      |
+| Gemma 3        |   .     |   .    |   Y   |   .   |   .   |   Y   |  .  |     Y     |     Y     |     .      |     .      |
+| GPT-OSS        |   .     |   Y    |   .   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| LLaMA          |   Y     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     Y     |     .      |     Y      |
+| LLaMA-v2       |   Y     |   .    |   Y   |   .   |   .   |   Y   |  Y  |     Y     |     Y     |     .      |     Y      |
+| LLaMA 3        |   .     |   .    |   .   |   .   |   Y   |   Y   |  Y  |     Y     |     .     |     .      |     .      |
+| LLaMA 4        |   Y     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| Mistral        |   .     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     Y     |     .      |     .      |
+| Mixtral        |   Y     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| Phi            |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     Y     |     .     |     .      |     .      |
+| Qwen           |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     Y     |     Y     |     .      |     Y      |
+| Qwen-2/2.5     |   Y     |   .    |   Y   |   .   |   .   |   Y   |  .  |     Y     |     Y     |     .      |     Y      |
+| Qwen-3         |   Y     |   .    |   Y   |   .   |   .   |   Y   |  Y  |     .     |     Y     |     .      |     Y      |
+| BLIP2-OPT      |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| BLIP2-T5       |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
+| LLaVA          |   .     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     Y     |     .      |     Y      |
+| VILA           |   .     |   .    |   Y   |   .   |   .   |   Y   |  .  |     .     |     Y     |     .      |     Y      |
+| Nougat         |   .     |   .    |   .   |   .   |   .   |   Y   |  .  |     .     |     .     |     .      |     .      |
 
 
 ```{note}
@@ -93,13 +120,13 @@ The language component decides which quantization methods are supported by a giv
 
 ## Hardware Support Matrix 
 
-| Model          |  NVFP4  | MXFP4  | FP8(per tensor)| FP8(block scaling) | FP8(rowwise) | FP8 KV Cache |W4A8 AWQ  | W4A16 AWQ | W4A8 GPTQ  | W4A16 GPTQ |
-| :------------- | :---:   | :---:  | :---: | :---: | :---: | :---: | :-------: | :-------: | :--------: | :--------: |
-| Blackwell(sm120)       |   Y     |   Y    |   Y   |   .   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| Blackwell(sm100)       |   Y     |   Y    |   Y   |   Y   |   .   |   Y   |     .     |     .     |     .      |     .      |
-| Hopper           |   .     |   .    |   Y   |   Y   |   Y   |   Y   |     Y     |     Y     |     Y      |     Y      |
-| Ada Lovelace          |   .     |   .    |   Y   |   .   |   .   |   Y   |     Y     |     Y     |     Y      |     Y      |
-| Ampere         |   .     |   .    |   .   |   .   |   .   |   Y   |     .     |     Y     |     .      |     Y      |
+| Model          |  NVFP4  | MXFP4  | FP8(per tensor)| FP8(block scaling) | FP8(rowwise) | FP8 KV Cache | NVFP4 KV Cache | W4A8 AWQ  | W4A16 AWQ | W4A8 GPTQ  | W4A16 GPTQ |
+| :------------- | :---:   | :---:  | :---: | :---: | :---: | :---: | :---: | :-------: | :-------: | :--------: | :--------: |
+| Blackwell(sm120)       |   Y     |   Y    |   Y   |   .   |   .   |   Y   |   .   |     .     |     .     |     .      |     .      |
+| Blackwell(sm100)       |   Y     |   Y    |   Y   |   Y   |   .   |   Y   |   Y   |     .     |     .     |     .      |     .      |
+| Hopper           |   .     |   .    |   Y   |   Y   |   Y   |   Y   |   .   |     Y     |     Y     |     Y      |     Y      |
+| Ada Lovelace          |   .     |   .    |   Y   |   .   |   .   |   Y   |   .   |     Y     |     Y     |     Y      |     Y      |
+| Ampere         |   .     |   .    |   .   |   .   |   .   |   Y   |   .   |     .     |     Y     |     .      |     Y      |
 ```{note}
 FP8 block wise scaling GEMM kernels for sm100 are using MXFP8 recipe (E4M3 act/weight and UE8M0 act/weight scale), which is slightly different from SM90 FP8 recipe (E4M3 act/weight and FP32 act/weight scale).
 ```
@@ -108,4 +135,4 @@ FP8 block wise scaling GEMM kernels for sm100 are using MXFP8 recipe (E4M3 act/w
 ## Quick Links
 
 - [Pre-quantized Models by ModelOpt](https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4)
-- [ModelOpt Support Matrix](https://nvidia.github.io/TensorRT-Model-Optimizer/guides/0_support_matrix.html)
+- [ModelOpt Support Matrix](https://nvidia.github.io/Model-Optimizer/guides/0_support_matrix.html)
diff --git a/latest/_sources/legacy/performance/perf-analysis.md.txt b/latest/_sources/legacy/performance/perf-analysis.md.txt
index f72437f4e9..51abd6460d 100644
--- a/latest/_sources/legacy/performance/perf-analysis.md.txt
+++ b/latest/_sources/legacy/performance/perf-analysis.md.txt
@@ -66,10 +66,10 @@ Say we want to profile iterations 100 to 150 on a trtllm-bench/trtllm-serve run,
 #!/bin/bash
 
 # Prepare dataset for the benchmark
-python3 benchmarks/cpp/prepare_dataset.py \
-    --tokenizer=${MODEL_PATH} \
-    --stdout token-norm-dist --num-requests=${NUM_SAMPLES} \
-    --input-mean=1000 --output-mean=1000 --input-stdev=0 --output-stdev=0 > /tmp/dataset.txt
+trtllm-bench \
+    --model=${MODEL_PATH} prepare-dataset \
+    --output /tmp/dataset.txt token-norm-dist --num-requests=${NUM_SAMPLES} \
+    --input-mean=1000 --output-mean=1000 --input-stdev=0 --output-stdev=0
 
 # Benchmark and profile
 TLLM_PROFILE_START_STOP=100-150 nsys profile \
diff --git a/latest/_sources/legacy/performance/perf-benchmarking.md.txt b/latest/_sources/legacy/performance/perf-benchmarking.md.txt
index 55caef07ba..9530b6da1b 100644
--- a/latest/_sources/legacy/performance/perf-benchmarking.md.txt
+++ b/latest/_sources/legacy/performance/perf-benchmarking.md.txt
@@ -110,7 +110,7 @@ of 128:128.
 To run the benchmark from start to finish, run the following commands:
 
 ```shell
-python benchmarks/cpp/prepare_dataset.py --stdout --tokenizer meta-llama/Llama-3.1-8B token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 3000 > /tmp/synthetic_128_128.txt
+trtllm-bench --tokenizer meta-llama/Llama-3.1-8B prepare-dataset --output /tmp/synthetic_128_128.txt token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 3000
 trtllm-bench --model meta-llama/Llama-3.1-8B build --dataset /tmp/synthetic_128_128.txt --quantization FP8
 trtllm-bench --model meta-llama/Llama-3.1-8B throughput --dataset /tmp/synthetic_128_128.txt --engine_dir /tmp/meta-llama/Llama-3.1-8B/tp_1_pp_1
 ```
@@ -207,7 +207,7 @@ directory. For example, to generate a synthetic dataset of 1000 requests with a
 128/128 for [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B), run:
 
 ```shell
-benchmarks/cpp/prepare_dataset.py --stdout --tokenizer meta-llama/Llama-3.1-8B token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 1000 > /tmp/synthetic_128_128.txt
+trtllm-bench --tokenizer meta-llama/Llama-3.1-8B prepare-dataset --output /tmp/synthetic_128_128.txt token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 3000
 ```
 
 ### Building a Benchmark Engine
@@ -662,7 +662,7 @@ checkpoint. For the Llama-3.1 models, TensorRT-LLM provides the following checkp
 - [`nvidia/Llama-3.1-405B-Instruct-FP8`](https://huggingface.co/nvidia/Llama-3.1-405B-Instruct-FP8)
 
 `trtllm-bench` utilizes the `hf_quant_config.json` file present in the pre-quantized checkpoints above. The configuration
-file is present in checkpoints quantized with [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer)
+file is present in checkpoints quantized with [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer)
 and describes the compute and KV cache quantization that checkpoint was compiled with. For example, from the checkpoints
 above:
 
diff --git a/latest/_sources/legacy/reference/support-matrix.md.txt b/latest/_sources/legacy/reference/support-matrix.md.txt
index 1dc59fcfa0..24a3a01512 100644
--- a/latest/_sources/legacy/reference/support-matrix.md.txt
+++ b/latest/_sources/legacy/reference/support-matrix.md.txt
@@ -133,6 +133,7 @@ In addition, older architectures can have limitations for newer software release
 * - GPU Model Architectures
   -
     - [NVIDIA GB200 NVL72](https://www.nvidia.com/en-us/data-center/gb200-nvl72/)
+    - [NVIDIA GB300 NVL72](https://www.nvidia.com/en-us/data-center/gb300-nvl72/)
     - [NVIDIA Blackwell Architecture](https://www.nvidia.com/en-us/data-center/technologies/blackwell-architecture/)
     - [NVIDIA Grace Hopper Superchip](https://www.nvidia.com/en-us/data-center/grace-hopper-superchip/)
     - [NVIDIA Hopper Architecture](https://www.nvidia.com/en-us/data-center/technologies/hopper-architecture/)
diff --git a/latest/_sources/llm-api/reference.rst.txt b/latest/_sources/llm-api/reference.rst.txt
index 76a2c9f0e2..8816f4ccc3 100644
--- a/latest/_sources/llm-api/reference.rst.txt
+++ b/latest/_sources/llm-api/reference.rst.txt
@@ -17,6 +17,14 @@ API Reference
     :member-order: groupwise
     :inherited-members:
 
+.. autoclass:: tensorrt_llm.llmapi.AsyncLLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :special-members: __init__
+    :member-order: groupwise
+    :inherited-members:
+
 .. autoclass:: tensorrt_llm.llmapi.MultimodalEncoder
     :members:
     :undoc-members:
@@ -288,7 +296,7 @@ API Reference
     :special-members: __init__
     :member-order: groupwise
     :inherited-members:
-    :exclude-members: model_parametrized_name,update_forward_refs,model_rebuild,parse_raw,from_orm,model_validate_strings,model_computed_fields,validate,model_post_init,model_copy,dict,schema,parse_obj,json,model_validate_json,copy,model_config,model_dump_json,model_fields,schema_json,construct,model_extra,model_json_schema,model_validate,model_dump,parse_file,model_fields_set,model_construct
+    :exclude-members: model_rebuild,model_fields_set,parse_obj,model_post_init,model_fields,validate,from_orm,update_forward_refs,model_dump_json,model_dump,parse_file,model_json_schema,model_parametrized_name,json,model_validate,model_config,model_copy,model_construct,parse_raw,model_validate_json,dict,construct,schema,copy,model_validate_strings,model_computed_fields,model_extra,schema_json
 
 .. autoclass:: tensorrt_llm.llmapi.TrtLlmArgs
     :members:
@@ -297,7 +305,7 @@ API Reference
     :special-members: __init__
     :member-order: groupwise
     :inherited-members:
-    :exclude-members: model_parametrized_name,update_forward_refs,model_rebuild,parse_raw,from_orm,model_validate_strings,model_computed_fields,validate,model_post_init,model_copy,dict,schema,parse_obj,json,model_validate_json,copy,model_config,model_dump_json,model_fields,schema_json,construct,model_extra,model_json_schema,model_validate,model_dump,parse_file,model_fields_set,model_construct
+    :exclude-members: model_rebuild,model_fields_set,parse_obj,model_post_init,model_fields,validate,from_orm,update_forward_refs,model_dump_json,model_dump,parse_file,model_json_schema,model_parametrized_name,json,model_validate,model_config,model_copy,model_construct,parse_raw,model_validate_json,dict,construct,schema,copy,model_validate_strings,model_computed_fields,model_extra,schema_json
 
 .. autoclass:: tensorrt_llm.llmapi.AutoDecodingConfig
     :members:
diff --git a/latest/_sources/models/supported-models.md.txt b/latest/_sources/models/supported-models.md.txt
index c6b6194b5d..40f3840073 100644
--- a/latest/_sources/models/supported-models.md.txt
+++ b/latest/_sources/models/supported-models.md.txt
@@ -8,6 +8,7 @@ The following is a table of supported models for the PyTorch backend:
 | `BertForSequenceClassification`      | BERT-based                         | `textattack/bert-base-uncased-yelp-polarity` |
 | `DeciLMForCausalLM`                  | Nemotron                           | `nvidia/Llama-3_1-Nemotron-51B-Instruct`     |
 | `DeepseekV3ForCausalLM`              | DeepSeek-V3                        | `deepseek-ai/DeepSeek-V3`                    |
+| `DeepseekV32ForCausalLM`             | DeepSeek-V3.2                      | `deepseek-ai/DeepSeek-V3.2`                  |
 | `Exaone4ForCausalLM`                 | EXAONE 4.0                         | `LGAI-EXAONE/EXAONE-4.0-32B`                 |
 | `Gemma3ForCausalLM`                  | Gemma 3                            | `google/gemma-3-1b-it`                       |
 | `GptOssForCausalLM`                  | GPT-OSS                            | `openai/gpt-oss-120b`                        |
@@ -34,6 +35,7 @@ Note: Support for other models may vary. Features marked "N/A" are not applicabl
 | Model Architecture/Feature     | Overlap Scheduler | CUDA Graph | Attention Data Parallelism | Disaggregated Serving | Chunked Prefill | MTP | EAGLE-3(One Model Engine) | EAGLE-3(Two Model Engine) | Torch Sampler | TLLM C++ Sampler | KV Cache Reuse | Sliding Window Attention | Logits Post Processor | Guided Decoding |
 | ------------------------------ | ----------------- | ---------- | -------------------------- | --------------------- | --------------- | --- | ------------------------- | ------------------------- | ------------- | ---------------- | -------------- | ------------------------ | --------------------- | --------------- |
 | `DeepseekV3ForCausalLM`          | Yes               | Yes        | Yes                        | Yes                   | Yes [^1]        | Yes | No                        | No                        | Yes           | Yes              | Yes [^2]       | N/A                      | Yes                   | Yes             |
+| `DeepseekV32ForCausalLM`         | Yes               | Yes        | Yes                        | Yes                   | Yes             | Yes | No                        | No                        | Yes           | Yes              | Yes            | N/A                      | Yes                   | Yes             |
 | `Qwen3MoeForCausalLM`            | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Yes            | N/A                      | Yes                   | Yes             |
 | `Qwen3NextForCausalLM`           | Yes                | Yes        | No                         | Untested                    | Yes              | No  | No                        | No                        | Yes            | Yes               | No             | No                       | Untested                    | Untested              |
 | `Llama4ForConditionalGeneration` | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Untested       | N/A                      | Yes                   | Yes             |
diff --git a/latest/_sources/overview.md.txt b/latest/_sources/overview.md.txt
index 0df4f72539..471e57ff23 100644
--- a/latest/_sources/overview.md.txt
+++ b/latest/_sources/overview.md.txt
@@ -4,7 +4,7 @@
 
 ## About TensorRT LLM
 
-[TensorRT LLM](https://developer.nvidia.com/tensorrt) is NVIDIA's comprehensive open-source library for accelerating and optimizing inference performance of the latest large language models (LLMs) on NVIDIA GPUs. 
+[TensorRT LLM](https://developer.nvidia.com/tensorrt) is NVIDIA's comprehensive open-source library for accelerating and optimizing inference performance of the latest large language models (LLMs) on NVIDIA GPUs.
 
 ## Key Capabilities
 
@@ -40,7 +40,7 @@ TensorRT LLM strives to support the most popular models on **Day 0**.
 ### 🚀 **Advanced Optimization & Production Features**
 - **[In-Flight Batching & Paged Attention](./features/paged-attention-ifb-scheduler.md)**: In-flight batching eliminates wait times by dynamically managing request execution, processing context and generation phases together for maximum GPU utilization and reduced latency.
 - **[Multi-GPU Multi-Node Inference](./features/parallel-strategy.md)**: Seamless distributed inference with tensor, pipeline, and expert parallelism across multiple GPUs and nodes through the Model Definition API.
-- **[Advanced Quantization](./features/quantization.md)**: 
+- **[Advanced Quantization](./features/quantization.md)**:
   - **FP4 Quantization**: Native support on NVIDIA B200 GPUs with optimized FP4 kernels
   - **FP8 Quantization**: Automatic conversion on NVIDIA H100 GPUs leveraging Hopper architecture
 - **[Speculative Decoding](./features/speculative-decoding.md)**: Multiple algorithms including EAGLE, MTP and NGram
@@ -54,7 +54,7 @@ TensorRT LLM strives to support the most popular models on **Day 0**.
 ### 🔧 **Latest GPU Architecture Support**
 
 TensorRT LLM supports the full spectrum of NVIDIA GPU architectures:
-- **NVIDIA Blackwell**: B200, GB200, RTX Pro 6000 SE with FP4 optimization
+- **NVIDIA Blackwell**: B200, GB200, B300, GB300, and RTX Pro 6000 SE with FP4 optimization
 - **NVIDIA Hopper**: H100, H200,GH200 with FP8 acceleration
 - **NVIDIA Ada Lovelace**: L40/L40S, RTX 40 series with FP8 acceleration
 - **NVIDIA Ampere**: A100, RTX 30 series for production workloads
diff --git a/latest/_sources/quick-start-guide.md.txt b/latest/_sources/quick-start-guide.md.txt
index 088f70b3ea..6eff451feb 100644
--- a/latest/_sources/quick-start-guide.md.txt
+++ b/latest/_sources/quick-start-guide.md.txt
@@ -10,7 +10,7 @@ This is the starting point to try out TensorRT LLM. Specifically, this Quick Sta
 The [TensorRT LLM container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags) maintained by NVIDIA contains all of the required dependencies pre-installed. You can start the container on a machine with NVIDIA GPUs via:
 
 ```bash
-docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5
+docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 ```
 
 
diff --git a/latest/_sources/torch/auto_deploy/advanced/expert_configurations.md.txt b/latest/_sources/torch/auto_deploy/advanced/expert_configurations.md.txt
index 4df92f0cf7..cf4c2c94dd 100644
--- a/latest/_sources/torch/auto_deploy/advanced/expert_configurations.md.txt
+++ b/latest/_sources/torch/auto_deploy/advanced/expert_configurations.md.txt
@@ -190,6 +190,25 @@ Specifies which sharding dimensions to apply during heuristic sharding. The avai
 
 You can enable multiple dimensions simultaneously. For example, `['tp', 'ep']` will apply both tensor parallelism and expert parallelism.
 
+#### `process_grid` (dict, default: `None`)
+
+Specifies a 2D device mesh for hybrid EP+TP parallelism.
+
+- NOTE 1: This grid applies only to the MoE layers. Attention, Mamba, and MLP layers are unaffected.
+- NOTE 2: The order of the keys matters. Process grid's layout is in the generalized column-major order,
+  that is, the last dimension is stride-one.
+- NOTE 3: `ep * tp` must be equal to the provided world size. Otherwise, the mesh will be considered invalid,
+  and 1D ep-only parallelism will be applied.
+
+Example:
+
+```
+    process_grid: {'ep': 2, 'tp': 2}
+```
+
+If `world_size == 4`, ranks \[0,1\] and \[2,3\] will create two EP groups. Experts will be distributed across these two
+groups, and internally, TP=2 column-row sharding will be applied.
+
 #### `requires_shape_prop` (bool, default: `true`)
 
 Whether shape propagation is required before applying this transform. Shape propagation enables the transform to make informed decisions about sharding strategies based on tensor dimensions.
diff --git a/latest/_sources/torch/auto_deploy/support_matrix.md.txt b/latest/_sources/torch/auto_deploy/support_matrix.md.txt
index c8780cbca1..f0158253dd 100644
--- a/latest/_sources/torch/auto_deploy/support_matrix.md.txt
+++ b/latest/_sources/torch/auto_deploy/support_matrix.md.txt
@@ -118,7 +118,7 @@ Optimize attention operations with different attention kernel implementations:
 
 ### Precision Support
 
-AutoDeploy supports models with various precision formats, including quantized checkpoints generated by [`TensorRT-Model-Optimizer`](https://github.com/NVIDIA/TensorRT-Model-Optimizer).
+AutoDeploy supports models with various precision formats, including quantized checkpoints generated by [`Model-Optimizer`](https://github.com/NVIDIA/Model-Optimizer).
 
 **Supported precision types include:**
 
diff --git a/latest/_sources/torch/features/quantization.md.txt b/latest/_sources/torch/features/quantization.md.txt
index a2b6c48be2..47cc745165 100644
--- a/latest/_sources/torch/features/quantization.md.txt
+++ b/latest/_sources/torch/features/quantization.md.txt
@@ -1,7 +1,7 @@
 # Quantization
 
 The PyTorch backend supports FP8 and NVFP4 quantization. You can pass quantized models in HF model hub,
-which are generated by [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer).
+which are generated by [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
 
 ```python
 from tensorrt_llm._torch import LLM
@@ -12,7 +12,7 @@ llm.generate("Hello, my name is")
 Or you can try the following commands to get a quantized model by yourself:
 
 ```bash
-git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
-cd TensorRT-Model-Optimizer/examples/llm_ptq
+git clone https://github.com/NVIDIA/Model-Optimizer.git
+cd Model-Optimizer/examples/llm_ptq
 scripts/huggingface_example.sh --model <huggingface_model_card> --quant fp8 --export_fmt hf
 ```
diff --git a/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html b/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
index 25eacbb65f..0519f15432 100644
--- a/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
+++ b/latest/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -573,7 +575,7 @@
 <li><p><a class="reference internal" href="#exploring-more-isl-osl-combinations">Exploring more ISL/OSL combinations</a></p>
 <ul>
 <li><p><a class="reference internal" href="#wip-enable-more-features-by-default">WIP: Enable more features by default</a></p></li>
-<li><p><a class="reference internal" href="#not-supported-mla-chunked-context-support-on-hopper"><span class="xref myst">Not supported: MLA chunked context support on Hopper</span></a></p></li>
+<li><p><a class="reference internal" href="#mla-chunked-context">MLA chunked context</a></p></li>
 <li><p><a class="reference internal" href="#out-of-memory-issues">Out of memory issues</a></p></li>
 </ul>
 </li>
@@ -612,8 +614,11 @@ For NVIDIA Hopper GPUs, it’s recommended to use the FP8 version of the DeepSee
 <span class="nv">YOUR_MODEL_PATH</span><span class="o">=</span>&lt;YOUR_MODEL_PATH&gt;
 <span class="nb">cd</span><span class="w"> </span><span class="nv">$YOUR_MODEL_PATH</span>
 
-<span class="c1">## Download FP4 model for Blackwell GPUs</span>
-git<span class="w"> </span>clone<span class="w"> </span>https://huggingface.co/nvidia/DeepSeek-R1-FP4
+<span class="c1">## Download NVFP4 model for Blackwell GPUs</span>
+git<span class="w"> </span>clone<span class="w"> </span>https://huggingface.co/nvidia/DeepSeek-R1-NVFP4-v2
+
+<span class="c1">## Or the 0528 version</span>
+git<span class="w"> </span>clone<span class="w"> </span>https://huggingface.co/nvidia/DeepSeek-R1-0528-NVFP4-v2
 
 <span class="c1">## Download FP8 model for Hopper GPUs</span>
 <span class="c1">## FP8 model also works for Blackwell, but FP4 has the best performance on Blackwell.</span>
@@ -784,13 +789,13 @@ trtllm-bench<span class="w">  </span>--model<span class="w"> </span>nvidia/DeepS
 <h4>Benchmark<a class="headerlink" href="#id1" title="Link to this heading">#</a></h4>
 <p>To do the benchmark, run the following command:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># generate synthetic dataset</span>
-python<span class="w"> </span><span class="si">${</span><span class="nv">YOUR_WORK_PATH</span><span class="si">}</span>/benchmarks/cpp/prepare_dataset.py<span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--stdout<span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--tokenizer<span class="w"> </span>nvidia/DeepSeek-R1-FP4<span class="w"> </span><span class="se">\</span>
+trtllm-bench<span class="w"> </span>--model<span class="w"> </span>nvidia/DeepSeek-R1-FP4<span class="w"> </span><span class="se">\</span>
+<span class="w">        </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
+<span class="w">        </span>--output<span class="w"> </span>dataset.txt<span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>token-norm-dist<span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>--input-mean<span class="w"> </span><span class="m">1024</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">2048</span><span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--num-requests<span class="w"> </span><span class="m">49152</span><span class="w"> </span>&gt;<span class="w"> </span>dataset.txt
+<span class="w">        </span>--num-requests<span class="w"> </span><span class="m">49152</span>
 
 <span class="nv">YOUR_DATA_PATH</span><span class="o">=</span>./dataset.txt
 
@@ -888,13 +893,14 @@ trtllm-bench<span class="w"> </span>--model<span class="w"> </span>deepseek-ai/D
 <p>Our benchmark results are based on <strong>Batch = 1024, ISL = 1K, OSL = 2K, num_requests = 5120 from real dataset</strong>
 To do the benchmark, run the following command:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># generate synthetic dataset</span>
-python<span class="w"> </span><span class="si">${</span><span class="nv">YOUR_WORK_PATH</span><span class="si">}</span>/benchmarks/cpp/prepare_dataset.py<span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--stdout<span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--tokenizer<span class="w"> </span>deepseek-ai/DeepSeek-R1<span class="w"> </span><span class="se">\</span>
+trtllm-bench<span class="w"> </span>--model<span class="w"> </span>nvidia/DeepSeek-R1-FP4<span class="w"> </span><span class="se">\</span>
+<span class="w">        </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
+<span class="w">        </span>--output<span class="w"> </span>dataset.txt<span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>token-norm-dist<span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>--input-mean<span class="w"> </span><span class="m">1024</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">2048</span><span class="w"> </span><span class="se">\</span>
 <span class="w">        </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="se">\</span>
-<span class="w">        </span>--num-requests<span class="w"> </span><span class="m">5120</span><span class="w"> </span>&gt;<span class="w"> </span>dataset.txt
+<span class="w">        </span>--num-requests<span class="w"> </span><span class="m">5120</span>
+
 <span class="nv">YOUR_DATA_PATH</span><span class="o">=</span>./dataset.txt
 
 cat<span class="w"> </span>&gt;./extra-llm-api-config.yml<span class="s">&lt;&lt;EOF</span>
@@ -941,10 +947,10 @@ trtllm-bench<span class="w"> </span>-m<span class="w"> </span>deepseek-ai/DeepSe
 </section>
 <section id="exploring-more-isl-osl-combinations">
 <h2>Exploring more ISL/OSL combinations<a class="headerlink" href="#exploring-more-isl-osl-combinations" title="Link to this heading">#</a></h2>
-<p>To benchmark TensorRT LLM on DeepSeek models with more ISL/OSL combinations, you can use <code class="docutils literal notranslate"><span class="pre">prepare_dataset.py</span></code> to generate the dataset and use similar commands mentioned in the previous section. TensorRT LLM is working on enhancements that can make the benchmark process smoother.</p>
+<p>To benchmark TensorRT LLM on DeepSeek models with more ISL/OSL combinations, you can use the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">prepare-dataset</span></code> subcommand to generate the dataset and use similar commands mentioned in the previous section. TensorRT LLM is working on enhancements that can make the benchmark process smoother.</p>
 <section id="wip-enable-more-features-by-default">
 <h3>WIP: Enable more features by default<a class="headerlink" href="#wip-enable-more-features-by-default" title="Link to this heading">#</a></h3>
-<p>Currently, there are some features that need to be enabled through a user-defined file <code class="docutils literal notranslate"><span class="pre">extra-llm-api-config.yml</span></code>, such as CUDA graph, overlap scheduler and attention dp. We’re working on to enable those features by default, so that users can get good out-of-the-box performance on DeepSeek models.</p>
+<p>Currently, there are some features that need to be enabled through a user-defined file <code class="docutils literal notranslate"><span class="pre">extra-llm-api-config.yml</span></code>, such as attention dp. We’re working on to enable those features by default, so that users can get good out-of-the-box performance on DeepSeek models.</p>
 <p>Note that, <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code> and <code class="docutils literal notranslate"><span class="pre">max_num_tokens</span></code> can easily affect the performance. The default values for them are already carefully designed and should deliver good performance on overall cases, however, you may still need to tune it for peak performance.</p>
 <p>Generally, you should make sure that <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code> is not too low to bottleneck the throughput, and <code class="docutils literal notranslate"><span class="pre">max_num_tokens</span></code> needs to be large enough so that it covers the max input sequence length of the samples in dataset, as mentioned in below section “WIP: Chunked context support on DeepSeek models”.</p>
 <p>For more details on <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code> and <code class="docutils literal notranslate"><span class="pre">max_num_tokens</span></code>, refer to <a class="reference internal" href="#../performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md"><span class="xref myst">Tuning Max Batch Size and Max Num Tokens</span></a>.</p>
@@ -1142,9 +1148,9 @@ trtllm-bench<span class="w"> </span>-m<span class="w"> </span>deepseek-ai/DeepSe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/Falcon180B-H200.html b/latest/blogs/Falcon180B-H200.html
index b6d8714df6..5b2bd0fb2e 100644
--- a/latest/blogs/Falcon180B-H200.html
+++ b/latest/blogs/Falcon180B-H200.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -792,9 +794,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/H100vsA100.html b/latest/blogs/H100vsA100.html
index df0126c3d4..5c36b72892 100644
--- a/latest/blogs/H100vsA100.html
+++ b/latest/blogs/H100vsA100.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -764,9 +766,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/H200launch.html b/latest/blogs/H200launch.html
index 141a3c5120..c3f3ac0b4b 100644
--- a/latest/blogs/H200launch.html
+++ b/latest/blogs/H200launch.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -756,9 +758,9 @@ TensorRT LLM v0.5.0, TensorRT v9.1.0.4 | H200, H100 FP8. </sub></p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/XQA-kernel.html b/latest/blogs/XQA-kernel.html
index 499db3c0cc..4c14c8754e 100644
--- a/latest/blogs/XQA-kernel.html
+++ b/latest/blogs/XQA-kernel.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -723,9 +725,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/quantization-in-TRT-LLM.html b/latest/blogs/quantization-in-TRT-LLM.html
index d847c48431..5e2b5349a5 100644
--- a/latest/blogs/quantization-in-TRT-LLM.html
+++ b/latest/blogs/quantization-in-TRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -858,9 +860,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog10_ADP_Balance_Strategy.html b/latest/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
index 1490d95381..45a685a126 100644
--- a/latest/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
+++ b/latest/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1105,9 +1107,9 @@ The Pareto frontier analysis provides critical insights for real-world deploymen
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html b/latest/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
index 2255aff912..7ad6cd1172 100644
--- a/latest/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
+++ b/latest/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -798,9 +800,9 @@ cat<span class="w"> </span>&gt;<span class="w"> </span>/config/models/eagle/eagl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html b/latest/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
index 765c4f42f7..5f7b666b8d 100644
--- a/latest/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
+++ b/latest/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1019,9 +1021,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html b/latest/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
index 6b8f5abc6f..7be3ab8af2 100644
--- a/latest/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
+++ b/latest/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1086,9 +1088,9 @@ is a certainty-based, training-free approach to accelerate Chain-of-Thought (CoT
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html b/latest/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
index 1e37727c86..f353f696d2 100644
--- a/latest/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
+++ b/latest/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -578,7 +580,7 @@
 <section id="wo-gemm-fp4-quantization">
 <h3>wo GEMM FP4 quantization<a class="headerlink" href="#wo-gemm-fp4-quantization" title="Link to this heading">#</a></h3>
 <p>The wo GEMM is the final linear layer within the multi-head attention block that produces the final outputs. While DeepSeek R1’s MLA modifies the initial projections for keys and values, the wo GEMM operator remains a critical and standard component for finalizing the attention computation. In the term, “wo” is the abbreviation for the weight matrix for the output.</p>
-<p>We’ve evaluated that quantizing the wo GEMM to FP4 still satisfies the accuracy requirements, maintaining a similar MTP accept rate (AR) while improving end-to-end performance. The <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">NVIDIA TensorRT Model Optimizer</a> team has published checkpoints that additionally quantize the wo module in attention layers to FP4 on HuggingFace:</p>
+<p>We’ve evaluated that quantizing the wo GEMM to FP4 still satisfies the accuracy requirements, maintaining a similar MTP accept rate (AR) while improving end-to-end performance. The <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">NVIDIA Model Optimizer</a> team has published checkpoints that additionally quantize the wo module in attention layers to FP4 on HuggingFace:</p>
 <ul class="simple">
 <li><p>https://huggingface.co/nvidia/DeepSeek-R1-FP4-v2</p></li>
 <li><p>https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2</p></li>
@@ -905,9 +907,9 @@ However, since Q is in BF16 format, FMHA will also be performed in BF16 format,
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html b/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
index 82fe6ddea3..13e78dd23d 100644
--- a/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
+++ b/latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -633,7 +635,7 @@
 </table>
 </div>
 <p>*TensorRT LLM already supports <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core/deepseek_v3#fp8-kv-cache-and-mla">FP8 Attention</a> while for this latency scenario low-precision attention computation doesn’t help with performance so we choose to use bf16 precision for the Attention Modules.</p>
-<p>** nvfp4 model checkpoint is generated by the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">NVIDIA TensorRT Model Optimizer toolkit</a>.</p>
+<p>** nvfp4 model checkpoint is generated by the <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">NVIDIA Model Optimizer toolkit</a>.</p>
 <p>*** RouterGEMM uses bf16 inputs/weights with fp32 outputs for numerical stability</p>
 </section>
 <section id="parallelism-strategy">
@@ -1199,9 +1201,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html b/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
index 4af90cf90b..7a89ed19f3 100644
--- a/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
+++ b/latest/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -944,9 +946,9 @@ trtllm-bench<span class="w"> </span>--model<span class="w"> </span>nvidia/DeepSe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html b/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
index ce6fab6341..ccf5ca3729 100644
--- a/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
+++ b/latest/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -559,7 +561,7 @@
 <li><p>FP8 KV cache and FP8 attention, rather than BF16 precision.</p></li>
 <li><p>FP4 Allgather for better communication bandwidth utilization.</p></li>
 </ul>
-<p>The checkpoint used in this blog is hosted in <a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-FP4">nvidia/DeepSeek-R1-FP4</a>, generated by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">NVIDIA Model Optimizer</a>. The accuracy score of common dataset on this FP4 checkpoint and TensorRT LLM implementations are:</p>
+<p>The checkpoint used in this blog is hosted in <a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-FP4">nvidia/DeepSeek-R1-FP4</a>, generated by <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">NVIDIA Model Optimizer</a>. The accuracy score of common dataset on this FP4 checkpoint and TensorRT LLM implementations are:</p>
 <div class="pst-scrollable-table-container"><table class="table">
 <thead>
 <tr class="row-odd"><th class="head text-left"><p>Precision</p></th>
@@ -921,9 +923,9 @@ Running the shared and routed experts in 2 streams combined with other multi-str
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html b/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
index bee5898530..813562cbbc 100644
--- a/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
+++ b/latest/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1465,9 +1467,9 @@ Based on our current performance analysis, when you plan to apply large-scale EP
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html b/latest/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
index 46c999ff17..36b6bb09b6 100644
--- a/latest/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
+++ b/latest/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -979,9 +981,9 @@ trtllm-serve<span class="w"> </span>disaggregated<span class="w"> </span>-c<span
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html b/latest/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
index 01faf0f8e6..615f170282 100644
--- a/latest/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
+++ b/latest/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -822,9 +824,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html b/latest/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
index 12071d1199..bcca8e91c5 100644
--- a/latest/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
+++ b/latest/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -944,9 +946,9 @@ N-Gram with <code class="docutils literal notranslate"><span class="pre">k</span
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html b/latest/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
index 9e5be5defb..81e61abf84 100644
--- a/latest/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
+++ b/latest/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -989,9 +991,9 @@ always<span class="w"> </span>defer<span class="w"> </span>defer+madvise<span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html b/latest/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
index 1c87110201..63960202da 100644
--- a/latest/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
+++ b/latest/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1014,9 +1016,9 @@ others according to your needs.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-bench.html b/latest/commands/trtllm-bench.html
index 7ca9d47370..5a54d0108a 100644
--- a/latest/commands/trtllm-bench.html
+++ b/latest/commands/trtllm-bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1354,9 +1356,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-build.html b/latest/commands/trtllm-build.html
index 5b6e0b276e..aa4e855a53 100644
--- a/latest/commands/trtllm-build.html
+++ b/latest/commands/trtllm-build.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1021,9 +1023,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-eval.html b/latest/commands/trtllm-eval.html
index 52385af072..90e128480f 100644
--- a/latest/commands/trtllm-eval.html
+++ b/latest/commands/trtllm-eval.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1463,9 +1465,9 @@ trtllm-eval<span class="w"> </span>--model<span class="w"> </span>meta-llama/Lla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-serve/index.html b/latest/commands/trtllm-serve/index.html
index 76484f9e65..d92b689469 100644
--- a/latest/commands/trtllm-serve/index.html
+++ b/latest/commands/trtllm-serve/index.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -652,9 +654,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html b/latest/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
index c7fb4c5f66..844d7060c9 100644
--- a/latest/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
+++ b/latest/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -548,7 +550,7 @@ A complete reference for the API is available in the <a class="reference externa
 <h2>Launch the NGC container<a class="headerlink" href="#launch-the-ngc-container" title="Link to this heading">#</a></h2>
 <p>TensorRT LLM distributes the pre-built container on <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags">NGC Catalog</a>.</p>
 <p>You can launch the container using the following command:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 </pre></div>
 </div>
 </section>
@@ -1018,9 +1020,9 @@ trtllm-serve<span class="w"> </span><span class="si">${</span><span class="nv">m
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/commands/trtllm-serve/trtllm-serve.html b/latest/commands/trtllm-serve/trtllm-serve.html
index 2e49705978..00133d0b40 100644
--- a/latest/commands/trtllm-serve/trtllm-serve.html
+++ b/latest/commands/trtllm-serve/trtllm-serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -551,7 +553,7 @@
 </section>
 <section id="inference-endpoints">
 <h2>Inference Endpoints<a class="headerlink" href="#inference-endpoints" title="Link to this heading">#</a></h2>
-<p>After you start the server, you can send inference requests through completions API and Chat API, which are compatible with corresponding OpenAI APIs. We use <a class="reference external" href="https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0">TinyLlama-1.1B-Chat-v1.0</a> for examples in the following sections.</p>
+<p>After you start the server, you can send inference requests through completions API, Chat API and Responses API, which are compatible with corresponding OpenAI APIs. We use <a class="reference external" href="https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0">TinyLlama-1.1B-Chat-v1.0</a> for examples in the following sections.</p>
 <section id="chat-api">
 <h3>Chat API<a class="headerlink" href="#chat-api" title="Link to this heading">#</a></h3>
 <p>You can query Chat API with any http clients, a typical example is OpenAI Python client:</p>
@@ -627,6 +629,40 @@
 </pre></div>
 </div>
 </section>
+<section id="responses-api">
+<h3>Responses API<a class="headerlink" href="#responses-api" title="Link to this heading">#</a></h3>
+<p>You can query Responses API with any http clients, a typical example is OpenAI Python client:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="c1">### :title OpenAI Responses Client</span>
+<span class="linenos"> 2</span>
+<span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
+<span class="linenos"> 4</span>
+<span class="linenos"> 5</span><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
+<span class="linenos"> 6</span>    <span class="n">base_url</span><span class="o">=</span><span class="s2">&quot;http://localhost:8000/v1&quot;</span><span class="p">,</span>
+<span class="linenos"> 7</span>    <span class="n">api_key</span><span class="o">=</span><span class="s2">&quot;tensorrt_llm&quot;</span><span class="p">,</span>
+<span class="linenos"> 8</span><span class="p">)</span>
+<span class="linenos"> 9</span>
+<span class="linenos">10</span><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">responses</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
+<span class="linenos">11</span>    <span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">,</span>
+<span class="linenos">12</span>    <span class="nb">input</span><span class="o">=</span><span class="s2">&quot;Where is New York?&quot;</span><span class="p">,</span>
+<span class="linenos">13</span>    <span class="n">max_output_tokens</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span>
+<span class="linenos">14</span><span class="p">)</span>
+<span class="linenos">15</span><span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Another example uses <code class="docutils literal notranslate"><span class="pre">curl</span></code>:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos">1</span><span class="ch">#! /usr/bin/env bash</span>
+<span class="linenos">2</span>
+<span class="linenos">3</span>curl<span class="w"> </span>http://localhost:8000/v1/responses<span class="w"> </span><span class="se">\</span>
+<span class="linenos">4</span><span class="w">    </span>-H<span class="w"> </span><span class="s2">&quot;Content-Type: application/json&quot;</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">5</span><span class="w">    </span>-d<span class="w"> </span><span class="s1">&#39;{</span>
+<span class="linenos">6</span><span class="s1">        &quot;model&quot;: &quot;TinyLlama-1.1B-Chat-v1.0&quot;,</span>
+<span class="linenos">7</span><span class="s1">        &quot;input&quot;: &quot;Where is New York?&quot;,</span>
+<span class="linenos">8</span><span class="s1">        &quot;max_output_tokens&quot;: 16</span>
+<span class="linenos">9</span><span class="s1">    }&#39;</span>
+</pre></div>
+</div>
+<p>More openai compatible examples can be found in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/serve/compatibility">compatibility examples</a> directory.</p>
+</section>
 <section id="multimodal-serving">
 <h3>Multimodal Serving<a class="headerlink" href="#multimodal-serving" title="Link to this heading">#</a></h3>
 <p>For multimodal models, you need to create a configuration file and start the server with additional options due to the following limitations:</p>
@@ -1197,7 +1233,7 @@ Since the statistics are stored in an internal queue and removed once retrieved,
 <dd><p>[Experimental] Specify the parser for reasoning models.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Options<span class="colon">:</span></dt>
-<dd class="field-odd"><p>deepseek-r1 | qwen3</p>
+<dd class="field-odd"><p>deepseek-r1 | qwen3 | nano-v3</p>
 </dd>
 </dl>
 </dd></dl>
@@ -1208,7 +1244,7 @@ Since the statistics are stored in an internal queue and removed once retrieved,
 <dd><p>[Experimental] Specify the parser for tool models.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Options<span class="colon">:</span></dt>
-<dd class="field-odd"><p>qwen3 | qwen3_coder</p>
+<dd class="field-odd"><p>qwen3 | qwen3_coder | kimi_k2</p>
 </dd>
 </dl>
 </dd></dl>
@@ -1329,6 +1365,7 @@ Please refer to <cite>Performance Benchmarking with `trtllm-serve</cite> &lt;<a
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inference-endpoints">Inference Endpoints</a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#chat-api">Chat API</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#completions-api">Completions API</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#responses-api">Responses API</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multimodal-serving">Multimodal Serving</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multimodal-chat-api">Multimodal Chat API</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multimodal-modality-coverage">Multimodal Modality Coverage</a></li>
@@ -1438,9 +1475,9 @@ Please refer to <cite>Performance Benchmarking with `trtllm-serve</cite> &lt;<a
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/config_table.html b/latest/deployment-guide/config_table.html
new file mode 100644
index 0000000000..b8f8368fd2
--- /dev/null
+++ b/latest/deployment-guide/config_table.html
@@ -0,0 +1,1928 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>DeepSeek-R1 &#8212; TensorRT LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
+    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+    <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+    <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=19d20f17" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+
+
+    <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../_static/copybutton.js?v=65e89d2a"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'deployment-guide/config_table';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+
+    <link rel="icon" href="../_static/favicon.png"/>
+
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="1.2.0rc6" />
+
+
+  </head>
+
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+
+
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+    <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
+      <span class="fa-solid fa-outdent"></span>
+    </button>
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../installation/containers.html">Pre-built release container images on NGC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async.html">Generate text asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async_streaming.html">Generate text in streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_sparse_attention.html">Sparse Attention</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_speculative_decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_kv_cache_connector.html">KV Cache Connector</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_runtime.html">Runtime Configuration Examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_sampling.html">Sampling Techniques Showcase</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-qwen3-on-trtllm.html">Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-kimi-k2-thinking-on-trtllm.html">Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../models/supported-models.html">Supported Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../models/adding-new-model.html">Adding a New Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-bench.html">trtllm-bench</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-eval.html">trtllm-eval</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">LLM API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/disagg-serving.html">Disaggregated Serving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kvcache.html">KV Cache System</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/long-sequence.html">Long Sequences</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/overlap-scheduler.html">Overlap Scheduler</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/sampling.html">Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/additional-outputs.html">Additional Outputs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/guided-decoding.html">Guided Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/speculative-decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/checkpoint-loading.html">Checkpoint Loading</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/helix.html">Helix Parallelism</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kv-cache-connector.html">KV Cache Connector</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/overview.html">Architecture Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-analysis.html">Performance Analysis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/dev-containers.html">Using Dev Containers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/api-change.html">LLM API Change Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">DeepSeek-R1</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><strong>Traffic Patterns</strong>: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+values in each configuration represent the <strong>maximum supported values</strong> for that config.
+Requests exceeding these limits may result in errors.</p>
+<p>To handle requests with input sequences <strong>longer than the configured ISL</strong>, add the following
+to your config file:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">enable_chunked_prefill</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
+</pre></div>
+</div>
+<p>This enables chunked prefill, which processes long input sequences in chunks rather than
+requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+does <strong>not</strong> guarantee optimal performance—these configs are tuned for the specified ISL/OSL.</p>
+</div>
+<section id="deepseek-ai-deepseek-r1-0528">
+<span id="id1"></span><h1><a class="reference external" href="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528">DeepSeek-R1</a><a class="headerlink" href="#deepseek-ai-deepseek-r1-0528" title="Link to this heading">#</a></h1>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="nvidia-deepseek-r1-0528-fp4-v2">
+<span id="id17"></span><h1><a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2">DeepSeek-R1 (NVFP4)</a><a class="headerlink" href="#nvidia-deepseek-r1-0528-fp4-v2" title="Link to this heading">#</a></h1>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml">1k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml">1k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml">1k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml">1k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml">8k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml">8k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml">8k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml">8k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="openai-gpt-oss-120b">
+<span id="id37"></span><h1><a class="reference external" href="https://huggingface.co/openai/gpt-oss-120b">gpt-oss-120b</a><a class="headerlink" href="#openai-gpt-oss-120b" title="Link to this heading">#</a></h1>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+
+              
+                <dialog id="pst-secondary-sidebar-modal"></dialog>
+                <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+<div
+    id="pst-page-navigation-heading-2"
+    class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> On this page
+  </div>
+  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DeepSeek-R1</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#nvidia-deepseek-r1-0528-fp4-v2">DeepSeek-R1 (NVFP4)</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#openai-gpt-oss-120b">gpt-oss-120b</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on December 15, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
index 6672b8be62..7352dc7ef3 100644
--- a/latest/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -595,7 +597,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -612,7 +614,7 @@ nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class=
 <h3>Recommended Performance Settings<a class="headerlink" href="#recommended-performance-settings" title="Link to this heading">#</a></h3>
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/deepseek-r1-throughput.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/deepseek-r1-throughput.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -644,7 +646,7 @@ cat<span class="w"> </span><span class="s">&lt;&lt; EOF &gt; ${EXTRA_LLM_API_FIL
 </div>
 <p>To use the <code class="docutils literal notranslate"><span class="pre">DeepGEMM</span></code> MOE backend on B200/GB200, use this config instead:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/deepseek-r1-deepgemm.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/deepseek-r1-deepgemm.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -729,7 +731,9 @@ cat<span class="w"> </span><span class="s">&lt;&lt; EOF &gt; ${EXTRA_LLM_API_FIL
 </section>
 <section id="trust-remote-code">
 <h4><code class="docutils literal notranslate"><span class="pre">trust_remote_code</span></code><a class="headerlink" href="#trust-remote-code" title="Link to this heading">#</a></h4>
-<p> <strong>Description:</strong> Allows TensorRT LLM to download models and tokenizers from Hugging Face. This flag is passed directly to the Hugging Face API.</p>
+<ul class="simple">
+<li><p><strong>Description:</strong> Allows TensorRT LLM to download models and tokenizers from Hugging Face. This flag is passed directly to the Hugging Face API.</p></li>
+</ul>
 </section>
 <section id="kv-cache-config">
 <h4><code class="docutils literal notranslate"><span class="pre">kv_cache_config</span></code><a class="headerlink" href="#kv-cache-config" title="Link to this heading">#</a></h4>
@@ -1008,6 +1012,410 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 </section>
 </section>
 </section>
+<section id="preconfigured-recipes">
+<h2>Preconfigured Recipes<a class="headerlink" href="#preconfigured-recipes" title="Link to this heading">#</a></h2>
+<p>The following tables list recommended configurations from the comprehensive database for different performance profiles.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><strong>Traffic Patterns</strong>: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+values in each configuration represent the <strong>maximum supported values</strong> for that config.
+Requests exceeding these limits may result in errors.</p>
+<p>To handle requests with input sequences <strong>longer than the configured ISL</strong>, add the following
+to your config file:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">enable_chunked_prefill</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
+</pre></div>
+</div>
+<p>This enables chunked prefill, which processes long input sequences in chunks rather than
+requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+does <strong>not</strong> guarantee optimal performance—these configs are tuned for the specified ISL/OSL.</p>
+</div>
+<section id="deepseek-ai-deepseek-r1-0528">
+<span id="id1"></span><h3><a class="reference external" href="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528">DeepSeek-R1</a><a class="headerlink" href="#deepseek-ai-deepseek-r1-0528" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="nvidia-deepseek-r1-0528-fp4-v2">
+<span id="id1"></span><h3><a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2">DeepSeek-R1 (NVFP4)</a><a class="headerlink" href="#nvidia-deepseek-r1-0528-fp4-v2" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml">1k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml">1k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml">1k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml">1k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml">8k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml">8k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml">8k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml">8k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+</section>
 </section>
 
 
@@ -1101,6 +1509,11 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 </li>
 </ul>
 </li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preconfigured-recipes">Preconfigured Recipes</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deepseek-ai-deepseek-r1-0528">DeepSeek-R1</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#nvidia-deepseek-r1-0528-fp4-v2">DeepSeek-R1 (NVFP4)</a></li>
+</ul>
+</li>
 </ul>
   </nav></div>
 
@@ -1192,9 +1605,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
index a3dcb09b28..be309fdff3 100644
--- a/latest/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -583,7 +585,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -601,7 +603,7 @@ nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class=
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <p>For low-latency use cases:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/gpt-oss-120b-latency.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/gpt-oss-120b-latency.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -631,7 +633,7 @@ cat<span class="w"> </span><span class="s">&lt;&lt; EOF &gt; ${EXTRA_LLM_API_FIL
 </div>
 <p>For max-throughput use cases:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/gpt-oss-120b-throughput.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/gpt-oss-120b-throughput.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -970,6 +972,889 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 </section>
 </section>
 </section>
+<section id="preconfigured-recipes">
+<h2>Preconfigured Recipes<a class="headerlink" href="#preconfigured-recipes" title="Link to this heading">#</a></h2>
+<p>The following table lists recommended configurations from the comprehensive database for different performance profiles.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><strong>Traffic Patterns</strong>: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+values in each configuration represent the <strong>maximum supported values</strong> for that config.
+Requests exceeding these limits may result in errors.</p>
+<p>To handle requests with input sequences <strong>longer than the configured ISL</strong>, add the following
+to your config file:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">enable_chunked_prefill</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
+</pre></div>
+</div>
+<p>This enables chunked prefill, which processes long input sequences in chunks rather than
+requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+does <strong>not</strong> guarantee optimal performance—these configs are tuned for the specified ISL/OSL.</p>
+</div>
+<section id="openai-gpt-oss-120b">
+<span id="id1"></span><h3><a class="reference external" href="https://huggingface.co/openai/gpt-oss-120b">gpt-oss-120b</a><a class="headerlink" href="#openai-gpt-oss-120b" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+</section>
 </section>
 
 
@@ -1060,6 +1945,10 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 </li>
 </ul>
 </li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preconfigured-recipes">Preconfigured Recipes</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#openai-gpt-oss-120b">gpt-oss-120b</a></li>
+</ul>
+</li>
 </ul>
   </nav></div>
 
@@ -1151,9 +2040,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html
index 4fb0ef600e..186a0c0598 100644
--- a/latest/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -812,6 +814,20 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 </pre></div>
 </div>
 </section>
+<section id="troubleshooting">
+<h2>Troubleshooting<a class="headerlink" href="#troubleshooting" title="Link to this heading">#</a></h2>
+<p>Since Kimi K2 Thinking has larger weight size than other models, it’s possible seeing host OOM issues, as the following:</p>
+<div class="highlight-log notranslate"><div class="highlight"><pre><span></span>Loading weights: 100%|█████████████████████| 1408/1408 [03:43&lt;00:00,  6.30it/s]
+ 0: [12/04/2025-18:38:28] [TRT-LLM] [RANK 0] [I] moe_load_balancer finalizing model...
+ 1: [nvl72136-T14:452151:0:452151] Caught signal 7 (Bus error: nonexistent physical address)
+ 1: ==== backtrace (tid: 452151) ====
+ 1:  0  /usr/local/ucx//lib/libucs.so.0(ucs_handle_error+0x2cc) [0xffff9638274c]
+ 1:  1  /usr/local/ucx//lib/libucs.so.0(+0x328fc) [0xffff963828fc]
+ 1:  2  /usr/local/ucx//lib/libucs.so.0(+0x32c78) [0xffff96382c78]
+</pre></div>
+</div>
+<p>This can be addressed by mounting <code class="docutils literal notranslate"><span class="pre">tmpfs:/dev/shm:size=640G</span></code> when launching the Docker container, to increase the shm size that the container can access.</p>
+</section>
 </section>
 
 
@@ -874,6 +890,7 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#deploy-kimi-k2-thinking-on-gb200-nvl72-through-slurm-with-wide-ep-and-disaggregated-serving">Deploy Kimi K2 Thinking on GB200 NVL72 through SLURM with wide EP and disaggregated serving</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#query-the-openai-compatible-api-endpoint">Query the OpenAI-compatible API Endpoint</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#benchmark">Benchmark</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#troubleshooting">Troubleshooting</a></li>
 </ul>
   </nav></div>
 
@@ -965,9 +982,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
index 82efa16aeb..0eb5921175 100644
--- a/latest/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -562,7 +564,7 @@ Python3 and python3-pip (Optional, for accuracy evaluation only)</p>
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -579,7 +581,7 @@ nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class=
 <h3>Recommended Performance Settings<a class="headerlink" href="#recommended-performance-settings" title="Link to this heading">#</a></h3>
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/llama-3.3-70b.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/llama-3.3-70b.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -1070,9 +1072,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
index 6f92d23017..b2cb805afe 100644
--- a/latest/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -564,7 +566,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -581,7 +583,7 @@ nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5<span class="w"> </span><span class=
 <h3>Recommended Performance Settings<a class="headerlink" href="#recommended-performance-settings" title="Link to this heading">#</a></h3>
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/llama-4-scout.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/llama-4-scout.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -1098,9 +1100,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
index fb316de6ec..72ce5438bc 100644
--- a/latest/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -561,7 +563,7 @@
 <h3>Recommended Performance Settings<a class="headerlink" href="#recommended-performance-settings" title="Link to this heading">#</a></h3>
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/qwen3-next.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/qwen3-next.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -934,9 +936,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html b/latest/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html
index e828ebd17d..8f45049d4e 100644
--- a/latest/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html
+++ b/latest/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -565,7 +567,7 @@ make<span class="w"> </span>-C<span class="w"> </span>docker<span class="w"> </s
 <h3>Recommended Performance Settings<a class="headerlink" href="#recommended-performance-settings" title="Link to this heading">#</a></h3>
 <p>We maintain YAML configuration files with recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs"><code class="docutils literal notranslate"><span class="pre">examples/configs</span></code></a> directory. These config files are present in the TensorRT LLM container at the path <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>. You can use these out-of-the-box, or adjust them to your specific use case.</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">TRTLLM_DIR</span><span class="o">=</span>/app/tensorrt_llm<span class="w"> </span><span class="c1"># change as needed to match your environment</span>
-<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/qwen3.yaml
+<span class="nv">EXTRA_LLM_API_FILE</span><span class="o">=</span><span class="si">${</span><span class="nv">TRTLLM_DIR</span><span class="si">}</span>/examples/configs/curated/qwen3.yaml
 </pre></div>
 </div>
 <p>Note: if you don’t have access to the source code locally, you can manually create the YAML config file using the code in the dropdown below.</p>
@@ -968,9 +970,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/index.html b/latest/deployment-guide/index.html
index ebddeae0cd..f05d2fd045 100644
--- a/latest/deployment-guide/index.html
+++ b/latest/deployment-guide/index.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -526,13 +528,18 @@
 <section id="quick-start-for-popular-models">
 <h2>Quick Start for Popular Models<a class="headerlink" href="#quick-start-for-popular-models" title="Link to this heading">#</a></h2>
 <p>The table below contains <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code> commands that can be used to easily deploy popular models including DeepSeek-R1, gpt-oss, Llama 4, Qwen3, and more.</p>
-<p>We maintain LLM API configuration files for these models containing recommended performance settings in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs">examples/configs</a> directory. The TensorRT LLM Docker container makes the config files available at <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs</span></code>, but you can customize this as needed:</p>
+<p>We maintain LLM API configuration files for these models containing recommended performance settings in two locations:</p>
+<ul class="simple">
+<li><p><strong>Curated Examples</strong>: <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs/curated">examples/configs/curated</a> - Hand-picked configurations for common scenarios.</p></li>
+<li><p><strong>Comprehensive Database</strong>: <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/configs/database">examples/configs/database</a> - A more comprehensive set of known-good configurations for various GPUs and traffic patterns.</p></li>
+</ul>
+<p>The TensorRT LLM Docker container makes these config files available at <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs/curated</span></code> and <code class="docutils literal notranslate"><span class="pre">/app/tensorrt_llm/examples/configs/database</span></code> respectively. You can reference them as needed:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span><span class="w"> </span><span class="nv">TRTLLM_DIR</span><span class="o">=</span><span class="s2">&quot;/app/tensorrt_llm&quot;</span><span class="w"> </span><span class="c1"># path to the TensorRT LLM repo in your local environment</span>
 </pre></div>
 </div>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
-<p>The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, you may benefit from additional tuning. In the future, we plan to provide more configs for a wider range of traffic patterns.</p>
+<p>The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the <a class="reference internal" href="#comprehensive-configuration-database"><span class="std std-ref">Comprehensive Configuration Database</span></a> section below which covers a larger set of traffic patterns and performance profiles.</p>
 </div>
 <p>This table is designed to provide a straightforward starting point; for detailed model-specific deployment guides, check out the guides below.</p>
 <div class="pst-scrollable-table-container"><table class="table">
@@ -555,62 +562,62 @@
 <tr class="row-even"><td><p><a class="reference external" href="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528">DeepSeek-R1</a></p></td>
 <td><p>H100, H200</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-throughput.yaml">deepseek-r1-throughput.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-throughput.yaml">deepseek-r1-throughput.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference external" href="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528">DeepSeek-R1</a></p></td>
 <td><p>B200, GB200</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-deepgemm.yaml">deepseek-r1-deepgemm.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/deepseek-r1-deepgemm.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-deepgemm.yaml">deepseek-r1-deepgemm.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-deepgemm.yaml</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-FP4">DeepSeek-R1 (NVFP4)</a></p></td>
 <td><p>B200, GB200</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-throughput.yaml">deepseek-r1-throughput.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-FP4</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-throughput.yaml">deepseek-r1-throughput.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-FP4</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-FP4-v2">DeepSeek-R1 (NVFP4)</a></p></td>
 <td><p>B200, GB200</p></td>
 <td><p>Min Latency</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/deepseek-r1-latency.yaml">deepseek-r1-latency.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/deepseek-r1-latency.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/deepseek-r1-latency.yaml">deepseek-r1-latency.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-latency.yaml</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference external" href="https://huggingface.co/openai/gpt-oss-120b">gpt-oss-120b</a></p></td>
 <td><p>Any</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/gpt-oss-120b-throughput.yaml">gpt-oss-120b-throughput.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/gpt-oss-120b-throughput.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/gpt-oss-120b-throughput.yaml">gpt-oss-120b-throughput.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-throughput.yaml</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference external" href="https://huggingface.co/openai/gpt-oss-120b">gpt-oss-120b</a></p></td>
 <td><p>Any</p></td>
 <td><p>Min Latency</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/gpt-oss-120b-latency.yaml">gpt-oss-120b-latency.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/gpt-oss-120b-latency.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/gpt-oss-120b-latency.yaml">gpt-oss-120b-latency.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-latency.yaml</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference external" href="https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking">Qwen3-Next-80B-A3B-Thinking</a></p></td>
 <td><p>Any</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/qwen3-next.yaml">qwen3-next.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">Qwen/Qwen3-Next-80B-A3B-Thinking</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/qwen3-next.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/qwen3-next.yaml">qwen3-next.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">Qwen/Qwen3-Next-80B-A3B-Thinking</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/qwen3-next.yaml</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p>Qwen3 family (e.g. <a class="reference external" href="https://huggingface.co/Qwen/Qwen3-30B-A3B">Qwen3-30B-A3B</a>)</p></td>
 <td><p>Any</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/qwen3.yaml">qwen3.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">Qwen/Qwen3-30B-A3B</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/qwen3.yaml</span></code> (swap to another Qwen3 model name as needed)</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/qwen3.yaml">qwen3.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">Qwen/Qwen3-30B-A3B</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/qwen3.yaml</span></code> (swap to another Qwen3 model name as needed)</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference external" href="https://huggingface.co/nvidia/Llama-3.3-70B-Instruct-FP8">Llama-3.3-70B (FP8)</a></p></td>
 <td><p>Any</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/llama-3.3-70b.yaml">llama-3.3-70b.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/Llama-3.3-70B-Instruct-FP8</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/llama-3.3-70b.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/llama-3.3-70b.yaml">llama-3.3-70b.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/Llama-3.3-70B-Instruct-FP8</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/llama-3.3-70b.yaml</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference external" href="https://huggingface.co/nvidia/Llama-4-Scout-17B-16E-Instruct-FP8">Llama 4 Scout (FP8)</a></p></td>
 <td><p>Any</p></td>
 <td><p>Max Throughput</p></td>
-<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/llama-4-scout.yaml">llama-4-scout.yaml</a></p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/Llama-4-Scout-17B-16E-Instruct-FP8</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/llama-4-scout.yaml</span></code></p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/curated/llama-4-scout.yaml">llama-4-scout.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/Llama-4-Scout-17B-16E-Instruct-FP8</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/curated/llama-4-scout.yaml</span></code></p></td>
 </tr>
 </tbody>
 </table>
@@ -631,6 +638,1275 @@
 </ul>
 </div>
 </section>
+<section id="comprehensive-configuration-database">
+<h2>Comprehensive Configuration Database<a class="headerlink" href="#comprehensive-configuration-database" title="Link to this heading">#</a></h2>
+<p>The table below lists all available pre-configured model scenarios in the TensorRT LLM configuration database. Each row represents a specific model, GPU, and performance profile combination with recommended request settings.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><strong>Traffic Patterns</strong>: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+values in each configuration represent the <strong>maximum supported values</strong> for that config.
+Requests exceeding these limits may result in errors.</p>
+<p>To handle requests with input sequences <strong>longer than the configured ISL</strong>, add the following
+to your config file:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">enable_chunked_prefill</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
+</pre></div>
+</div>
+<p>This enables chunked prefill, which processes long input sequences in chunks rather than
+requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+does <strong>not</strong> guarantee optimal performance—these configs are tuned for the specified ISL/OSL.</p>
+</div>
+<section id="deepseek-ai-deepseek-r1-0528">
+<span id="id6"></span><h3><a class="reference external" href="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528">DeepSeek-R1</a><a class="headerlink" href="#deepseek-ai-deepseek-r1-0528" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Balanced</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">deepseek-ai/DeepSeek-R1-0528</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="nvidia-deepseek-r1-0528-fp4-v2">
+<span id="id23"></span><h3><a class="reference external" href="https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2">DeepSeek-R1 (NVFP4)</a><a class="headerlink" href="#nvidia-deepseek-r1-0528-fp4-v2" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml">1k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml">1k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml">1k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml">1k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml">8k1k_tp4_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>128</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml">8k1k_tp8_conc128.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml">8k1k_tp4_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>256</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml">8k1k_tp8_conc256.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">nvidia/DeepSeek-R1-0528-FP4-v2</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="openai-gpt-oss-120b">
+<span id="id44"></span><h3><a class="reference external" href="https://huggingface.co/openai/gpt-oss-120b">gpt-oss-120b</a><a class="headerlink" href="#openai-gpt-oss-120b" title="Link to this heading">#</a></h3>
+<div class="pst-scrollable-table-container"><table class="table" style="width: 100%">
+<colgroup>
+<col style="width: 12.0%" />
+<col style="width: 15.0%" />
+<col style="width: 15.0%" />
+<col style="width: 13.0%" />
+<col style="width: 20.0%" />
+<col style="width: 25.0%" />
+</colgroup>
+<thead>
+<tr class="row-odd"><th class="head"><p>GPU</p></th>
+<th class="head"><p>Performance Profile</p></th>
+<th class="head"><p>ISL / OSL</p></th>
+<th class="head"><p>Concurrency</p></th>
+<th class="head"><p>Config</p></th>
+<th class="head"><p>Command</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>B200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xB200_NVL</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xB200_NVL</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml">1k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml">1k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml">1k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml">1k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml">1k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml">1k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml">1k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml">1k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml">1k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml">1k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml">1k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml">1k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml">1k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml">1k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml">1k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml">1k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml">1k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml">1k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml">1k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml">1k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml">1k8k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml">1k8k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml">1k8k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml">1k8k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml">1k8k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml">1k8k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml">1k8k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml">1k8k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml">1k8k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml">1k8k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml">1k8k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml">1k8k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml">1k8k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml">1k8k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml">1k8k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml">1k8k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml">1k8k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml">1k8k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml">1k8k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>1024 / 8192</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml">1k8k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Min Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml">8k1k_tp1_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml">8k1k_tp2_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml">8k1k_tp4_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>4</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml">8k1k_tp8_conc4.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml">8k1k_tp1_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml">8k1k_tp2_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml">8k1k_tp4_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>8</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml">8k1k_tp8_conc8.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml">8k1k_tp1_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>Low Latency</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml">8k1k_tp2_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml">8k1k_tp4_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>16</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml">8k1k_tp8_conc16.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml">8k1k_tp1_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml">8k1k_tp2_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml">8k1k_tp4_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>32</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml">8k1k_tp8_conc32.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>H200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml">8k1k_tp1_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>2xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml">8k1k_tp2_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p>4xH200_SXM</p></td>
+<td><p>High Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml">8k1k_tp4_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p>8xH200_SXM</p></td>
+<td><p>Max Throughput</p></td>
+<td><p>8192 / 1024</p></td>
+<td><p>64</p></td>
+<td><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml">8k1k_tp8_conc64.yaml</a></p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-serve</span> <span class="pre">openai/gpt-oss-120b</span> <span class="pre">--extra_llm_api_options</span> <span class="pre">${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml</span></code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+</section>
 </section>
 
 
@@ -684,6 +1960,12 @@
     <ul class="visible nav section-nav flex-column">
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quick-start-for-popular-models">Quick Start for Popular Models</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-specific-deployment-guides">Model-Specific Deployment Guides</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comprehensive-configuration-database">Comprehensive Configuration Database</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deepseek-ai-deepseek-r1-0528">DeepSeek-R1</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#nvidia-deepseek-r1-0528-fp4-v2">DeepSeek-R1 (NVFP4)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#openai-gpt-oss-120b">gpt-oss-120b</a></li>
+</ul>
+</li>
 </ul>
   </nav></div>
 
@@ -775,9 +2057,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/deployment-guide/note_sections.html b/latest/deployment-guide/note_sections.html
new file mode 100644
index 0000000000..f6ef69afbf
--- /dev/null
+++ b/latest/deployment-guide/note_sections.html
@@ -0,0 +1,678 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>&lt;no title&gt; &#8212; TensorRT LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
+    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+    <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+    <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=19d20f17" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+
+
+    <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../_static/copybutton.js?v=65e89d2a"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'deployment-guide/note_sections';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+
+    <link rel="icon" href="../_static/favicon.png"/>
+
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="1.2.0rc6" />
+
+
+  </head>
+
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+
+
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+    <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
+      <span class="fa-solid fa-outdent"></span>
+    </button>
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../installation/containers.html">Pre-built release container images on NGC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async.html">Generate text asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async_streaming.html">Generate text in streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_sparse_attention.html">Sparse Attention</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_speculative_decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_kv_cache_connector.html">KV Cache Connector</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_runtime.html">Runtime Configuration Examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_sampling.html">Sampling Techniques Showcase</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-qwen3-on-trtllm.html">Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide-for-kimi-k2-thinking-on-trtllm.html">Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../models/supported-models.html">Supported Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../models/adding-new-model.html">Adding a New Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-bench.html">trtllm-bench</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-eval.html">trtllm-eval</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">LLM API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/disagg-serving.html">Disaggregated Serving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kvcache.html">KV Cache System</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/long-sequence.html">Long Sequences</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/overlap-scheduler.html">Overlap Scheduler</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/sampling.html">Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/additional-outputs.html">Additional Outputs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/guided-decoding.html">Guided Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/speculative-decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/checkpoint-loading.html">Checkpoint Loading</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/helix.html">Helix Parallelism</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kv-cache-connector.html">KV Cache Connector</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/overview.html">Architecture Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-analysis.html">Performance Analysis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/dev-containers.html">Using Dev Containers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/api-change.html">LLM API Change Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">&lt;no title&gt;</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><strong>Traffic Patterns</strong>: The ISL (Input Sequence Length) and OSL (Output Sequence Length)
+values in each configuration represent the <strong>maximum supported values</strong> for that config.
+Requests exceeding these limits may result in errors.</p>
+<p>To handle requests with input sequences <strong>longer than the configured ISL</strong>, add the following
+to your config file:</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">enable_chunked_prefill</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
+</pre></div>
+</div>
+<p>This enables chunked prefill, which processes long input sequences in chunks rather than
+requiring them to fit within a single prefill operation. Note that enabling chunked prefill
+does <strong>not</strong> guarantee optimal performance—these configs are tuned for the specified ISL/OSL.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the <a class="reference internal" href="index.html#comprehensive-configuration-database"><span class="std std-ref">Comprehensive Configuration Database</span></a> section below which covers a larger set of traffic patterns and performance profiles.</p>
+</div>
+
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+
+              
+                <dialog id="pst-secondary-sidebar-modal"></dialog>
+                <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+<div
+    id="pst-page-navigation-heading-2"
+    class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> On this page
+  </div>
+  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
+    <ul class="simple visible nav section-nav flex-column">
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on December 15, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/developer-guide/api-change.html b/latest/developer-guide/api-change.html
index 000ef90c1c..80b5fb9083 100644
--- a/latest/developer-guide/api-change.html
+++ b/latest/developer-guide/api-change.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1039,9 +1041,9 @@ python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> <
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/ci-overview.html b/latest/developer-guide/ci-overview.html
index 95b5649479..42ab10d7b2 100644
--- a/latest/developer-guide/ci-overview.html
+++ b/latest/developer-guide/ci-overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -795,9 +797,9 @@ selective keeps CI turnaround fast and conserves hardware resources.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/dev-containers.html b/latest/developer-guide/dev-containers.html
index f57aec97fe..c412f99ef9 100644
--- a/latest/developer-guide/dev-containers.html
+++ b/latest/developer-guide/dev-containers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -757,9 +759,9 @@ initialization script will create one with the contents listed above.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/kv-transfer.html b/latest/developer-guide/kv-transfer.html
index 8cfab70ad3..055d43edb9 100644
--- a/latest/developer-guide/kv-transfer.html
+++ b/latest/developer-guide/kv-transfer.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -755,9 +757,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/overview.html b/latest/developer-guide/overview.html
index cec2b84e3f..7cf715168f 100644
--- a/latest/developer-guide/overview.html
+++ b/latest/developer-guide/overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -723,9 +725,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/perf-analysis.html b/latest/developer-guide/perf-analysis.html
index e55977f10c..d86a0e262c 100644
--- a/latest/developer-guide/perf-analysis.html
+++ b/latest/developer-guide/perf-analysis.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -586,10 +588,12 @@
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="ch">#!/bin/bash</span>
 
 <span class="c1"># Prepare dataset for the benchmark</span>
-python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--tokenizer<span class="o">=</span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--stdout<span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="si">${</span><span class="nv">NUM_SAMPLES</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--input-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>&gt;<span class="w"> </span>/tmp/dataset.txt
+trtllm-bench<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--output<span class="w"> </span>dataset.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>token-norm-dist<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--num-requests<span class="o">=</span><span class="si">${</span><span class="nv">NUM_SAMPLES</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--input-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span>
 
 <span class="c1"># Benchmark and profile</span>
 <span class="nv">TLLM_PROFILE_START_STOP</span><span class="o">=</span><span class="m">100</span>-150<span class="w"> </span>nsys<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -770,9 +774,9 @@ python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w">
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/perf-benchmarking.html b/latest/developer-guide/perf-benchmarking.html
index 34534669f2..09142d9b69 100644
--- a/latest/developer-guide/perf-benchmarking.html
+++ b/latest/developer-guide/perf-benchmarking.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -691,7 +693,7 @@ JSON entry is on every line.</p>
 <p>In order to prepare a synthetic dataset, you can use the provided script in the <code class="docutils literal notranslate"><span class="pre">benchmarks/cpp</span></code>
 directory. For example, to generate a synthetic dataset of 1000 requests with a uniform ISL/OSL of
 128/128 for <a class="reference external" href="https://huggingface.co/meta-llama/Llama-3.1-8B">meta-llama/Llama-3.1-8B</a>, run:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--stdout<span class="w"> </span>--tokenizer<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">1000</span><span class="w"> </span>&gt;<span class="w"> </span>/tmp/synthetic_128_128.txt
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--model<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>prepare-dataset<span class="w"> </span>--output<span class="w"> </span>/tmp/synthetic_128_128.txt<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">1000</span>
 </pre></div>
 </div>
 </section>
@@ -765,11 +767,11 @@ Total<span class="w"> </span>Latency<span class="w"> </span><span class="o">(</s
 <h4>Benchmarking with LoRA Adapters in PyTorch workflow<a class="headerlink" href="#benchmarking-with-lora-adapters-in-pytorch-workflow" title="Link to this heading">#</a></h4>
 <p>The PyTorch workflow supports benchmarking with LoRA (Low-Rank Adaptation) adapters. This requires preparing a dataset with LoRA metadata and configuring the LoRA settings.</p>
 <p><strong>Preparing LoRA Dataset</strong></p>
-<p>Use <code class="docutils literal notranslate"><span class="pre">prepare_dataset.py</span></code> with LoRA-specific options to generate requests with LoRA metadata:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span><span class="se">\</span>
-<span class="w">  </span>--stdout<span class="w"> </span><span class="se">\</span>
+<p>Use <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">prepare-dataset</span></code> with LoRA-specific options to generate requests with LoRA metadata:</p>
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--model<span class="w"> </span>/path/to/tokenizer<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
 <span class="w">  </span>--rand-task-id<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
-<span class="w">  </span>--tokenizer<span class="w"> </span>/path/to/tokenizer<span class="w"> </span><span class="se">\</span>
 <span class="w">  </span>--lora-dir<span class="w"> </span>/path/to/loras<span class="w"> </span><span class="se">\</span>
 <span class="w">  </span>token-norm-dist<span class="w"> </span><span class="se">\</span>
 <span class="w">  </span>--num-requests<span class="w"> </span><span class="m">100</span><span class="w"> </span><span class="se">\</span>
@@ -834,16 +836,17 @@ Each subdirectory should contain the LoRA adapter files for that specific task.<
 <h4>Running multi-modal models in the PyTorch Workflow<a class="headerlink" href="#running-multi-modal-models-in-the-pytorch-workflow" title="Link to this heading">#</a></h4>
 <p>To benchmark multi-modal models with PyTorch workflow, you can follow the similar approach as above.</p>
 <p>First, prepare the dataset:</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="o">./</span><span class="n">benchmarks</span><span class="o">/</span><span class="n">cpp</span><span class="o">/</span><span class="n">prepare_dataset</span><span class="o">.</span><span class="n">py</span> \
-  <span class="o">--</span><span class="n">tokenizer</span> <span class="n">Qwen</span><span class="o">/</span><span class="n">Qwen2</span><span class="o">-</span><span class="n">VL</span><span class="o">-</span><span class="mi">2</span><span class="n">B</span><span class="o">-</span><span class="n">Instruct</span> \
-  <span class="o">--</span><span class="n">stdout</span> \
-  <span class="n">dataset</span> \
-  <span class="o">--</span><span class="n">dataset</span><span class="o">-</span><span class="n">name</span> <span class="n">lmms</span><span class="o">-</span><span class="n">lab</span><span class="o">/</span><span class="n">MMMU</span> \
-  <span class="o">--</span><span class="n">dataset</span><span class="o">-</span><span class="n">split</span> <span class="n">test</span> \
-  <span class="o">--</span><span class="n">dataset</span><span class="o">-</span><span class="n">image</span><span class="o">-</span><span class="n">key</span> <span class="n">image</span> \
-  <span class="o">--</span><span class="n">dataset</span><span class="o">-</span><span class="n">prompt</span><span class="o">-</span><span class="n">key</span> <span class="n">question</span> \
-  <span class="o">--</span><span class="n">num</span><span class="o">-</span><span class="n">requests</span> <span class="mi">10</span> \
-  <span class="o">--</span><span class="n">output</span><span class="o">-</span><span class="nb">len</span><span class="o">-</span><span class="n">dist</span> <span class="mi">128</span><span class="p">,</span><span class="mi">5</span> <span class="o">&gt;</span> <span class="n">mm_data</span><span class="o">.</span><span class="n">jsonl</span>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--model<span class="w"> </span>Qwen/Qwen2-VL-2B-Instruct<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--output<span class="w"> </span>mm_data.jsonl
+<span class="w">  </span>real-dataset
+<span class="w">  </span>--dataset-name<span class="w"> </span>lmms-lab/MMMU<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--dataset-split<span class="w"> </span><span class="nb">test</span><span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--dataset-image-key<span class="w"> </span>image<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--dataset-prompt-key<span class="w"> </span>question<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--num-requests<span class="w"> </span><span class="m">10</span><span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--output-len-dist<span class="w"> </span><span class="m">128</span>,5
 </pre></div>
 </div>
 <p>It will download the media files to <code class="docutils literal notranslate"><span class="pre">/tmp</span></code> directory and prepare the dataset with their paths. Note that the <code class="docutils literal notranslate"><span class="pre">prompt</span></code> fields are texts and not tokenized ids. This is due to the fact that
@@ -944,9 +947,9 @@ checkpoint. For the Llama-3.1 models, TensorRT LLM provides the following checkp
 <li><p><a class="reference external" href="https://huggingface.co/nvidia/Llama-3.1-70B-Instruct-FP8"><code class="docutils literal notranslate"><span class="pre">nvidia/Llama-3.1-70B-Instruct-FP8</span></code></a></p></li>
 <li><p><a class="reference external" href="https://huggingface.co/nvidia/Llama-3.1-405B-Instruct-FP8"><code class="docutils literal notranslate"><span class="pre">nvidia/Llama-3.1-405B-Instruct-FP8</span></code></a></p></li>
 </ul>
-<p>To understand more about how to quantize your own checkpoints, refer to ModelOpt <a class="reference external" href="https://nvidia.github.io/TensorRT-Model-Optimizer/deployment/1_tensorrt_llm.html">documentation</a>.</p>
+<p>To understand more about how to quantize your own checkpoints, refer to ModelOpt <a class="reference external" href="https://nvidia.github.io/Model-Optimizer/deployment/1_tensorrt_llm.html">documentation</a>.</p>
 <p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span></code> utilizes the <code class="docutils literal notranslate"><span class="pre">hf_quant_config.json</span></code> file present in the pre-quantized checkpoints above. The configuration
-file is present in checkpoints quantized with <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">TensorRT Model Optimizer</a>
+file is present in checkpoints quantized with <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">Model Optimizer</a>
 and describes the compute and KV cache quantization that checkpoint was compiled with. For example, from the checkpoints
 above:</p>
 <div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
@@ -1175,9 +1178,9 @@ when the checkpoint precision is <code class="docutils literal notranslate"><spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/developer-guide/perf-overview.html b/latest/developer-guide/perf-overview.html
index db43ccba32..ec5fb499c7 100644
--- a/latest/developer-guide/perf-overview.html
+++ b/latest/developer-guide/perf-overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -531,7 +533,7 @@ Tuning batch sizes, parallelism configurations, and other options may lead to im
 <p>The below table shows performance data where a local inference client is fed requests at an infinite rate (no delay between messages),
 and shows the throughput scenario under maximum load. The reported metric is <code class="docutils literal notranslate"><span class="pre">Total</span> <span class="pre">Output</span> <span class="pre">Throughput</span> <span class="pre">(tokens/sec)</span></code>.</p>
 <p>The performance numbers below were collected using the steps described in this document.</p>
-<p>Testing was performed on models with weights quantized using <a class="reference external" href="https://nvidia.github.io/TensorRT-Model-Optimizer/#">ModelOpt</a> and published by NVIDIA on the <a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">Model Optimizer HuggingFace Collection</a>.</p>
+<p>Testing was performed on models with weights quantized using <a class="reference external" href="https://nvidia.github.io/Model-Optimizer/#">ModelOpt</a> and published by NVIDIA on the <a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">Model Optimizer HuggingFace Collection</a>.</p>
 <p><em>(NEW for v1.0) RTX 6000 Pro Blackwell Server Edition Benchmarks:</em></p>
 <p>RTX 6000 Pro Blackwell Server Edition data is now included in the perf overview. RTX 6000 systems can benefit from enabling pipeline parallelism (PP) in LLM workloads, so we included several new benchmarks for this GPU at various TP x PP combinations. That data is presented in a separate table for each network.</p>
 <section id="hardware">
@@ -1365,7 +1367,7 @@ nvidia/Qwen3-235B-A22B-FP8
 </section>
 <section id="preparing-a-dataset">
 <h3>Preparing a Dataset<a class="headerlink" href="#preparing-a-dataset" title="Link to this heading">#</a></h3>
-<p>In order to prepare a dataset, you can use the provided <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/benchmarks/cpp/prepare_dataset.py">script</a>.
+<p>In order to prepare a dataset, you can use the provided <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/benchmarks/cpp/prepare_dataset.py">script</a>.
 To generate a synthetic dataset, run the following command:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--tokenizer<span class="o">=</span><span class="nv">$model_name</span><span class="w"> </span>--stdout<span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="nv">$num_requests</span><span class="w"> </span>--input-mean<span class="o">=</span><span class="nv">$isl</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="nv">$osl</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>&gt;<span class="w"> </span><span class="nv">$dataset_file</span>
 </pre></div>
@@ -1445,7 +1447,7 @@ remain in the system longer and therefore require less requests to achieve stead
 <h3>Running the Benchmark<a class="headerlink" href="#running-the-benchmark" title="Link to this heading">#</a></h3>
 <p>To run the benchmark with the generated data set, simply use the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">throughput</span></code> subcommand. The benchmarker will
 run an offline maximum throughput scenario such that all requests are queued in rapid succession. You simply need to provide
-a model name (HuggingFace reference or path to a local model), a <a class="reference internal" href="#preparing-a-dataset">generated dataset</a>, and a file containing any desired extra options to the LLM APIs (details in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/llmapi/llm_args.py">tensorrt_llm/llmapi/llm_args.py:LlmArgs</a>).</p>
+a model name (HuggingFace reference or path to a local model), a <a class="reference internal" href="#preparing-a-dataset">generated dataset</a>, and a file containing any desired extra options to the LLM APIs (details in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/llmapi/llm_args.py">tensorrt_llm/llmapi/llm_args.py:LlmArgs</a>).</p>
 <p>For dense / non-MoE models:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--tp<span class="w"> </span><span class="nv">$tp_size</span><span class="w"> </span>--pp<span class="w"> </span><span class="nv">$pp_size</span><span class="w"> </span>--model<span class="w"> </span><span class="nv">$model_name</span><span class="w"> </span>throughput<span class="w"> </span>--dataset<span class="w"> </span><span class="nv">$dataset_file</span><span class="w"> </span>--backend<span class="w"> </span>pytorch<span class="w"> </span>--extra_llm_api_options<span class="w"> </span><span class="nv">$llm_options</span>
 </pre></div>
@@ -1698,9 +1700,9 @@ using the <code class="docutils literal notranslate"><span class="pre">--kv_cach
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_chat_client.html b/latest/examples/curl_chat_client.html
index d336289834..fdbbb789e1 100644
--- a/latest/examples/curl_chat_client.html
+++ b/latest/examples/curl_chat_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="curl-chat-client">
 <h1>Curl Chat Client<a class="headerlink" href="#curl-chat-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_chat_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_chat_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>curl<span class="w"> </span>http://localhost:8000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
@@ -662,9 +664,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_chat_client_for_multimodal.html b/latest/examples/curl_chat_client_for_multimodal.html
index 503eba1366..6e24fcb846 100644
--- a/latest/examples/curl_chat_client_for_multimodal.html
+++ b/latest/examples/curl_chat_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="curl-chat-client-for-multimodal">
 <h1>Curl Chat Client For Multimodal<a class="headerlink" href="#curl-chat-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_chat_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_chat_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="c1"># SINGLE IMAGE INFERENCE</span>
@@ -739,9 +741,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_completion_client.html b/latest/examples/curl_completion_client.html
index 863bdbf0d4..a9da98a9c8 100644
--- a/latest/examples/curl_completion_client.html
+++ b/latest/examples/curl_completion_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -70,13 +70,13 @@
 
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="Deepseek R1 Reasoning Parser" href="deepseek_r1_reasoning_parser.html" />
+    <link rel="next" title="Curl Responses Client" href="curl_responses_client.html" />
     <link rel="prev" title="Curl Chat Client For Multimodal" href="curl_chat_client_for_multimodal.html" />
 
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="curl-completion-client">
 <h1>Curl Completion Client<a class="headerlink" href="#curl-completion-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/curl_completion_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_completion_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>curl<span class="w"> </span>http://localhost:8000/v1/completions<span class="w"> </span><span class="se">\</span>
@@ -556,11 +558,11 @@
       </div>
     </a>
     <a class="right-next"
-       href="deepseek_r1_reasoning_parser.html"
+       href="curl_responses_client.html"
        title="next page">
       <div class="prev-next-info">
         <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Deepseek R1 Reasoning Parser</p>
+        <p class="prev-next-title">Curl Responses Client</p>
       </div>
       <i class="fa-solid fa-angle-right"></i>
     </a>
@@ -661,9 +663,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/curl_responses_client.html b/latest/examples/curl_responses_client.html
new file mode 100644
index 0000000000..fe6cafd43f
--- /dev/null
+++ b/latest/examples/curl_responses_client.html
@@ -0,0 +1,679 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>Curl Responses Client &#8212; TensorRT LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
+    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+    <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+    <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=19d20f17" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+
+
+    <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../_static/copybutton.js?v=65e89d2a"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'examples/curl_responses_client';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+
+    <link rel="icon" href="../_static/favicon.png"/>
+
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Deepseek R1 Reasoning Parser" href="deepseek_r1_reasoning_parser.html" />
+    <link rel="prev" title="Curl Completion Client" href="curl_completion_client.html" />
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="1.2.0rc6" />
+
+
+  </head>
+
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+
+
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../installation/containers.html">Pre-built release container images on NGC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_async.html">Generate text asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_async_streaming.html">Generate text in streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_sparse_attention.html">Sparse Attention</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_speculative_decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_kv_cache_connector.html">KV Cache Connector</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_runtime.html">Runtime Configuration Examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_sampling.html">Sampling Techniques Showcase</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 current active has-children"><a class="reference internal" href="trtllm_serve_examples.html">Online Serving Examples</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2 current active"><a class="current reference internal" href="#">Curl Responses Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../deployment-guide/index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-qwen3-on-trtllm.html">Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html">Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../models/supported-models.html">Supported Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../models/adding-new-model.html">Adding a New Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-bench.html">trtllm-bench</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-eval.html">trtllm-eval</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">LLM API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/disagg-serving.html">Disaggregated Serving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kvcache.html">KV Cache System</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/long-sequence.html">Long Sequences</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/overlap-scheduler.html">Overlap Scheduler</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/sampling.html">Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/additional-outputs.html">Additional Outputs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/guided-decoding.html">Guided Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/speculative-decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/checkpoint-loading.html">Checkpoint Loading</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/helix.html">Helix Parallelism</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kv-cache-connector.html">KV Cache Connector</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/overview.html">Architecture Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-analysis.html">Performance Analysis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/dev-containers.html">Using Dev Containers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/api-change.html">LLM API Change Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    
+    <li class="breadcrumb-item"><a href="trtllm_serve_examples.html" class="nav-link">Online Serving Examples</a></li>
+    
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Curl Responses Client</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section id="curl-responses-client">
+<h1>Curl Responses Client<a class="headerlink" href="#curl-responses-client" title="Link to this heading">#</a></h1>
+<p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/curl_responses_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos">1</span><span class="ch">#! /usr/bin/env bash</span>
+<span class="linenos">2</span>
+<span class="linenos">3</span>curl<span class="w"> </span>http://localhost:8000/v1/responses<span class="w"> </span><span class="se">\</span>
+<span class="linenos">4</span><span class="w">    </span>-H<span class="w"> </span><span class="s2">&quot;Content-Type: application/json&quot;</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">5</span><span class="w">    </span>-d<span class="w"> </span><span class="s1">&#39;{</span>
+<span class="linenos">6</span><span class="s1">        &quot;model&quot;: &quot;TinyLlama-1.1B-Chat-v1.0&quot;,</span>
+<span class="linenos">7</span><span class="s1">        &quot;input&quot;: &quot;Where is New York?&quot;,</span>
+<span class="linenos">8</span><span class="s1">        &quot;max_output_tokens&quot;: 16</span>
+<span class="linenos">9</span><span class="s1">    }&#39;</span>
+</pre></div>
+</div>
+</section>
+
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="curl_completion_client.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Curl Completion Client</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="deepseek_r1_reasoning_parser.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Deepseek R1 Reasoning Parser</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+<div class="bd-sidebar-secondary"></div>
+
+
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on December 15, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/examples/customization.html b/latest/examples/customization.html
index 63c45dbd36..05324cb733 100644
--- a/latest/examples/customization.html
+++ b/latest/examples/customization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -778,9 +780,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/deepseek_r1_reasoning_parser.html b/latest/examples/deepseek_r1_reasoning_parser.html
index 14cec96997..7762ec2935 100644
--- a/latest/examples/deepseek_r1_reasoning_parser.html
+++ b/latest/examples/deepseek_r1_reasoning_parser.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -71,12 +71,12 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Genai Perf Client" href="genai_perf_client.html" />
-    <link rel="prev" title="Curl Completion Client" href="curl_completion_client.html" />
+    <link rel="prev" title="Curl Responses Client" href="curl_responses_client.html" />
 
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="deepseek-r1-reasoning-parser">
 <h1>Deepseek R1 Reasoning Parser<a class="headerlink" href="#deepseek-r1-reasoning-parser" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/deepseek_r1_reasoning_parser.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/deepseek_r1_reasoning_parser.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>cat<span class="w"> </span>&gt;./extra-llm-api-config.yml<span class="w"> </span><span class="s">&lt;&lt;EOF</span>
@@ -560,12 +562,12 @@
                   
 <div class="prev-next-area">
     <a class="left-prev"
-       href="curl_completion_client.html"
+       href="curl_responses_client.html"
        title="previous page">
       <i class="fa-solid fa-angle-left"></i>
       <div class="prev-next-info">
         <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Curl Completion Client</p>
+        <p class="prev-next-title">Curl Responses Client</p>
       </div>
     </a>
     <a class="right-next"
@@ -674,9 +676,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/dynamo_k8s_example.html b/latest/examples/dynamo_k8s_example.html
index 496d402689..9a32399099 100644
--- a/latest/examples/dynamo_k8s_example.html
+++ b/latest/examples/dynamo_k8s_example.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -71,12 +71,12 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Model Recipes" href="../deployment-guide/index.html" />
-    <link rel="prev" title="OpenAI Completion Client with JSON Schema" href="openai_completion_client_json_schema.html" />
+    <link rel="prev" title="OpenAI Responses Client" href="openai_responses_client.html" />
 
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1 current active"><a class="current reference internal" href="#">Dynamo K8s Example</a></li>
@@ -544,12 +546,12 @@ for more details.</p>
                   
 <div class="prev-next-area">
     <a class="left-prev"
-       href="openai_completion_client_json_schema.html"
+       href="openai_responses_client.html"
        title="previous page">
       <i class="fa-solid fa-angle-left"></i>
       <div class="prev-next-info">
         <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">OpenAI Completion Client with JSON Schema</p>
+        <p class="prev-next-title">OpenAI Responses Client</p>
       </div>
     </a>
     <a class="right-next"
@@ -658,9 +660,9 @@ for more details.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/genai_perf_client.html b/latest/examples/genai_perf_client.html
index 9e8d58eb1a..c1fb182e9f 100644
--- a/latest/examples/genai_perf_client.html
+++ b/latest/examples/genai_perf_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="genai-perf-client">
 <h1>Genai Perf Client<a class="headerlink" href="#genai-perf-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/genai_perf_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/genai_perf_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>genai-perf<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -667,9 +669,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/genai_perf_client_for_multimodal.html b/latest/examples/genai_perf_client_for_multimodal.html
index 278235a68d..26e1c624d1 100644
--- a/latest/examples/genai_perf_client_for_multimodal.html
+++ b/latest/examples/genai_perf_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="genai-perf-client-for-multimodal">
 <h1>Genai Perf Client For Multimodal<a class="headerlink" href="#genai-perf-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/genai_perf_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/genai_perf_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>genai-perf<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -670,9 +672,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/index.html b/latest/examples/index.html
index b06e7db546..1e41d59632 100644
--- a/latest/examples/index.html
+++ b/latest/examples/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -522,7 +524,7 @@
 <span class="linenos"> 4</span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">():</span>
 <span class="linenos"> 5</span>
 <span class="linenos"> 6</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos"> 7</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos"> 7</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos"> 8</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">)</span>
 <span class="linenos"> 9</span>
 <span class="linenos">10</span>    <span class="c1"># Sample prompts.</span>
@@ -668,9 +670,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/kvcacheconfig.html b/latest/examples/kvcacheconfig.html
index 915f0044a3..528d70e216 100644
--- a/latest/examples/kvcacheconfig.html
+++ b/latest/examples/kvcacheconfig.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -663,9 +665,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/kvcacheretentionconfig.html b/latest/examples/kvcacheretentionconfig.html
index 8d7845be31..35f164afb4 100644
--- a/latest/examples/kvcacheretentionconfig.html
+++ b/latest/examples/kvcacheretentionconfig.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -686,9 +688,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_api_examples.html b/latest/examples/llm_api_examples.html
index 80abb3b323..583b7afe9c 100644
--- a/latest/examples/llm_api_examples.html
+++ b/latest/examples/llm_api_examples.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -703,9 +705,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_guided_decoding.html b/latest/examples/llm_guided_decoding.html
index d04edaa9c2..7b86591485 100644
--- a/latest/examples/llm_guided_decoding.html
+++ b/latest/examples/llm_guided_decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="generate-text-with-guided-decoding">
 <h1>Generate text with guided decoding<a class="headerlink" href="#generate-text-with-guided-decoding" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_guided_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_guided_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.llmapi</span><span class="w"> </span><span class="kn">import</span> <span class="n">GuidedDecodingParams</span>
 <span class="linenos"> 3</span>
@@ -694,9 +696,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference.html b/latest/examples/llm_inference.html
index e439ddf604..1c5e14da63 100644
--- a/latest/examples/llm_inference.html
+++ b/latest/examples/llm_inference.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,14 +522,14 @@
                   
   <section id="generate-text">
 <h1>Generate text<a class="headerlink" href="#generate-text" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>
 <span class="linenos"> 4</span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">():</span>
 <span class="linenos"> 5</span>
 <span class="linenos"> 6</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos"> 7</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos"> 7</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos"> 8</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">)</span>
 <span class="linenos"> 9</span>
 <span class="linenos">10</span>    <span class="c1"># Sample prompts.</span>
@@ -682,9 +684,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_async.html b/latest/examples/llm_inference_async.html
index b2702fc1f7..a55e8a4f2b 100644
--- a/latest/examples/llm_inference_async.html
+++ b/latest/examples/llm_inference_async.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="generate-text-asynchronously">
 <h1>Generate text asynchronously<a class="headerlink" href="#generate-text-asynchronously" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_async.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_async.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">import</span><span class="w"> </span><span class="nn">asyncio</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -690,9 +692,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_async_streaming.html b/latest/examples/llm_inference_async_streaming.html
index 20991e70e0..e9a520516f 100644
--- a/latest/examples/llm_inference_async_streaming.html
+++ b/latest/examples/llm_inference_async_streaming.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="generate-text-in-streaming">
 <h1>Generate text in streaming<a class="headerlink" href="#generate-text-in-streaming" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_async_streaming.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_async_streaming.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">import</span><span class="w"> </span><span class="nn">asyncio</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -711,9 +713,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_inference_distributed.html b/latest/examples/llm_inference_distributed.html
index 79e1858589..4aee881734 100644
--- a/latest/examples/llm_inference_distributed.html
+++ b/latest/examples/llm_inference_distributed.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="distributed-llm-generation">
 <h1>Distributed LLM Generation<a class="headerlink" href="#distributed-llm-generation" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_inference_distributed.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_inference_distributed.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>
@@ -691,9 +693,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_kv_cache_connector.html b/latest/examples/llm_kv_cache_connector.html
index d039b9ebe0..61966fcbaa 100644
--- a/latest/examples/llm_kv_cache_connector.html
+++ b/latest/examples/llm_kv_cache_connector.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="kv-cache-connector">
 <h1>KV Cache Connector<a class="headerlink" href="#kv-cache-connector" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_kv_cache_connector.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_kv_cache_connector.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&#39;&#39;&#39;</span>
 <span class="linenos">  2</span><span class="sd">This script demonstrates the KV cache connector feature in TensorRT-LLM, which enables</span>
 <span class="linenos">  3</span><span class="sd">custom persistence and reuse of KV cache blocks across different LLM instances.</span>
@@ -973,9 +975,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_kv_cache_offloading.html b/latest/examples/llm_kv_cache_offloading.html
index efed8fbde2..52d712d811 100644
--- a/latest/examples/llm_kv_cache_offloading.html
+++ b/latest/examples/llm_kv_cache_offloading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="kv-cache-offloading">
 <h1>KV Cache Offloading<a class="headerlink" href="#kv-cache-offloading" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_kv_cache_offloading.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_kv_cache_offloading.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&#39;&#39;&#39;</span>
 <span class="linenos">  2</span><span class="sd">This script demonstrates the effectiveness of KV cache host offloading in TensorRT-LLM.</span>
 <span class="linenos">  3</span>
@@ -781,9 +783,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_logits_processor.html b/latest/examples/llm_logits_processor.html
index 72508a8cc9..62c64b1378 100644
--- a/latest/examples/llm_logits_processor.html
+++ b/latest/examples/llm_logits_processor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="control-generated-text-using-logits-processor">
 <h1>Control generated text using logits processor<a class="headerlink" href="#control-generated-text-using-logits-processor" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_logits_processor.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_logits_processor.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
 <span class="linenos">  2</span>
 <span class="linenos">  3</span><span class="kn">import</span><span class="w"> </span><span class="nn">torch</span>
@@ -775,9 +777,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_llm_distributed.html b/latest/examples/llm_mgmn_llm_distributed.html
index e297fdac3a..efa3d7e8e6 100644
--- a/latest/examples/llm_mgmn_llm_distributed.html
+++ b/latest/examples/llm_mgmn_llm_distributed.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="run-llm-api-with-pytorch-backend-on-slurm">
 <h1>Run LLM-API with pytorch backend on Slurm<a class="headerlink" href="#run-llm-api-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_llm_distributed.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_llm_distributed.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos"> 2</span><span class="c1">#SBATCH -A &lt;account&gt;    # parameter</span>
 <span class="linenos"> 3</span><span class="c1">#SBATCH -p &lt;partition&gt;  # parameter</span>
@@ -741,9 +743,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_trtllm_bench.html b/latest/examples/llm_mgmn_trtllm_bench.html
index 9b74c6f68a..a51717638d 100644
--- a/latest/examples/llm_mgmn_trtllm_bench.html
+++ b/latest/examples/llm_mgmn_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="run-trtllm-bench-with-pytorch-backend-on-slurm">
 <h1>Run trtllm-bench with pytorch backend on Slurm<a class="headerlink" href="#run-trtllm-bench-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_trtllm_bench.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_trtllm_bench.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos">  2</span><span class="c1">#SBATCH -A &lt;account&gt;</span>
 <span class="linenos">  3</span><span class="c1">#SBATCH -p &lt;partition&gt;</span>
@@ -591,64 +593,63 @@
 <span class="linenos"> 68</span><span class="c1">#      not supported in Slurm mode, you need to download the model and put it in</span>
 <span class="linenos"> 69</span><span class="c1">#      the LOCAL_MODEL directory.</span>
 <span class="linenos"> 70</span>
-<span class="linenos"> 71</span><span class="nb">export</span><span class="w"> </span><span class="nv">prepare_dataset</span><span class="o">=</span><span class="s2">&quot;</span><span class="nv">$SOURCE_ROOT</span><span class="s2">/benchmarks/cpp/prepare_dataset.py&quot;</span>
-<span class="linenos"> 72</span><span class="nb">export</span><span class="w"> </span><span class="nv">data_path</span><span class="o">=</span><span class="s2">&quot;</span><span class="nv">$WORKDIR</span><span class="s2">/token-norm-dist.txt&quot;</span>
-<span class="linenos"> 73</span>
-<span class="linenos"> 74</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;Preparing dataset...&quot;</span>
-<span class="linenos"> 75</span>srun<span class="w"> </span>-l<span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 76</span><span class="w">    </span>-N<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 77</span><span class="w">    </span>-n<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 78</span><span class="w">    </span>--container-image<span class="o">=</span><span class="si">${</span><span class="nv">CONTAINER_IMAGE</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 79</span><span class="w">    </span>--container-name<span class="o">=</span><span class="s2">&quot;prepare-name&quot;</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 80</span><span class="w">    </span>--container-mounts<span class="o">=</span><span class="si">${</span><span class="nv">MOUNT_DIR</span><span class="si">}</span>:<span class="si">${</span><span class="nv">MOUNT_DEST</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 81</span><span class="w">    </span>--container-workdir<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 82</span><span class="w">    </span>--export<span class="o">=</span>ALL<span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 83</span><span class="w">    </span>--mpi<span class="o">=</span>pmix<span class="w"> </span><span class="se">\</span>
-<span class="linenos"> 84</span><span class="w">    </span>bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">&quot;</span>
-<span class="linenos"> 85</span><span class="s2">        </span><span class="nv">$PROLOGUE</span>
-<span class="linenos"> 86</span><span class="s2">        python3 </span><span class="nv">$prepare_dataset</span><span class="s2"> \</span>
-<span class="linenos"> 87</span><span class="s2">            --tokenizer=</span><span class="nv">$LOCAL_MODEL</span><span class="s2"> \</span>
-<span class="linenos"> 88</span><span class="s2">            --stdout token-norm-dist \</span>
-<span class="linenos"> 89</span><span class="s2">            --num-requests=100 \</span>
-<span class="linenos"> 90</span><span class="s2">            --input-mean=128 \</span>
-<span class="linenos"> 91</span><span class="s2">            --output-mean=128 \</span>
-<span class="linenos"> 92</span><span class="s2">            --input-stdev=0 \</span>
-<span class="linenos"> 93</span><span class="s2">            --output-stdev=0 &gt; </span><span class="nv">$data_path</span>
-<span class="linenos"> 94</span><span class="s2">    &quot;</span>
-<span class="linenos"> 95</span>
-<span class="linenos"> 96</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;Running benchmark...&quot;</span>
-<span class="linenos"> 97</span><span class="c1"># Just launch trtllm-bench job with trtllm-llmapi-launch command.</span>
-<span class="linenos"> 98</span>
-<span class="linenos"> 99</span>srun<span class="w"> </span>-l<span class="w"> </span><span class="se">\</span>
-<span class="linenos">100</span><span class="w">    </span>--container-image<span class="o">=</span><span class="si">${</span><span class="nv">CONTAINER_IMAGE</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos">101</span><span class="w">    </span>--container-mounts<span class="o">=</span><span class="si">${</span><span class="nv">MOUNT_DIR</span><span class="si">}</span>:<span class="si">${</span><span class="nv">MOUNT_DEST</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos">102</span><span class="w">    </span>--container-workdir<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos">103</span><span class="w">    </span>--export<span class="o">=</span>ALL,PYTHONPATH<span class="o">=</span><span class="si">${</span><span class="nv">SOURCE_ROOT</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="linenos">104</span><span class="w">    </span>--mpi<span class="o">=</span>pmix<span class="w"> </span><span class="se">\</span>
-<span class="linenos">105</span><span class="w">    </span>bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">&quot;</span>
-<span class="linenos">106</span><span class="s2">        set -ex</span>
-<span class="linenos">107</span><span class="s2">        </span><span class="nv">$PROLOGUE</span>
-<span class="linenos">108</span><span class="s2">        export PATH=</span><span class="nv">$PATH</span><span class="s2">:~/.local/bin</span>
-<span class="linenos">109</span>
-<span class="linenos">110</span><span class="s2">        # This is optional</span>
-<span class="linenos">111</span><span class="s2">        cat &gt; /tmp/pytorch_extra_args.txt &lt;&lt; EOF</span>
-<span class="linenos">112</span><span class="s2">cuda_graph_config: null</span>
-<span class="linenos">113</span><span class="s2">print_iter_log: true</span>
-<span class="linenos">114</span><span class="s2">enable_attention_dp: false</span>
-<span class="linenos">115</span><span class="s2">EOF</span>
-<span class="linenos">116</span>
-<span class="linenos">117</span><span class="s2">        # launch the benchmark</span>
-<span class="linenos">118</span><span class="s2">        trtllm-llmapi-launch \</span>
-<span class="linenos">119</span><span class="s2">         trtllm-bench \</span>
-<span class="linenos">120</span><span class="s2">            --model </span><span class="nv">$MODEL_NAME</span><span class="s2"> \</span>
-<span class="linenos">121</span><span class="s2">            --model_path </span><span class="nv">$LOCAL_MODEL</span><span class="s2"> \</span>
-<span class="linenos">122</span><span class="s2">            throughput \</span>
-<span class="linenos">123</span><span class="s2">            --dataset </span><span class="nv">$data_path</span><span class="s2"> \</span>
-<span class="linenos">124</span><span class="s2">            --backend pytorch \</span>
-<span class="linenos">125</span><span class="s2">            --tp 16 \</span>
-<span class="linenos">126</span><span class="s2">            --extra_llm_api_options /tmp/pytorch_extra_args.txt \</span>
-<span class="linenos">127</span><span class="s2">            </span><span class="nv">$EXTRA_ARGS</span>
-<span class="linenos">128</span><span class="s2">    &quot;</span>
+<span class="linenos"> 71</span><span class="nb">export</span><span class="w"> </span><span class="nv">data_path</span><span class="o">=</span><span class="s2">&quot;</span><span class="nv">$WORKDIR</span><span class="s2">/token-norm-dist.txt&quot;</span>
+<span class="linenos"> 72</span>
+<span class="linenos"> 73</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;Preparing dataset...&quot;</span>
+<span class="linenos"> 74</span>srun<span class="w"> </span>-l<span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 75</span><span class="w">    </span>-N<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 76</span><span class="w">    </span>-n<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 77</span><span class="w">    </span>--container-image<span class="o">=</span><span class="si">${</span><span class="nv">CONTAINER_IMAGE</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 78</span><span class="w">    </span>--container-name<span class="o">=</span><span class="s2">&quot;prepare-name&quot;</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 79</span><span class="w">    </span>--container-mounts<span class="o">=</span><span class="si">${</span><span class="nv">MOUNT_DIR</span><span class="si">}</span>:<span class="si">${</span><span class="nv">MOUNT_DEST</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 80</span><span class="w">    </span>--container-workdir<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 81</span><span class="w">    </span>--export<span class="o">=</span>ALL<span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 82</span><span class="w">    </span>--mpi<span class="o">=</span>pmix<span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 83</span><span class="w">    </span>bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">&quot;</span>
+<span class="linenos"> 84</span><span class="s2">        </span><span class="nv">$PROLOGUE</span>
+<span class="linenos"> 85</span><span class="s2">        trtllm-bench --model=</span><span class="nv">$LOCAL_MODEL</span><span class="s2"> prepare-dataset \</span>
+<span class="linenos"> 86</span><span class="s2">            --output </span><span class="nv">$data_path</span><span class="s2"> \</span>
+<span class="linenos"> 87</span><span class="s2">            token-norm-dist \</span>
+<span class="linenos"> 88</span><span class="s2">            --num-requests=100 \</span>
+<span class="linenos"> 89</span><span class="s2">            --input-mean=128 \</span>
+<span class="linenos"> 90</span><span class="s2">            --output-mean=128 \</span>
+<span class="linenos"> 91</span><span class="s2">            --input-stdev=0 \</span>
+<span class="linenos"> 92</span><span class="s2">            --output-stdev=0</span>
+<span class="linenos"> 93</span><span class="s2">    &quot;</span>
+<span class="linenos"> 94</span>
+<span class="linenos"> 95</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;Running benchmark...&quot;</span>
+<span class="linenos"> 96</span><span class="c1"># Just launch trtllm-bench job with trtllm-llmapi-launch command.</span>
+<span class="linenos"> 97</span>
+<span class="linenos"> 98</span>srun<span class="w"> </span>-l<span class="w"> </span><span class="se">\</span>
+<span class="linenos"> 99</span><span class="w">    </span>--container-image<span class="o">=</span><span class="si">${</span><span class="nv">CONTAINER_IMAGE</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">100</span><span class="w">    </span>--container-mounts<span class="o">=</span><span class="si">${</span><span class="nv">MOUNT_DIR</span><span class="si">}</span>:<span class="si">${</span><span class="nv">MOUNT_DEST</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">101</span><span class="w">    </span>--container-workdir<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">102</span><span class="w">    </span>--export<span class="o">=</span>ALL,PYTHONPATH<span class="o">=</span><span class="si">${</span><span class="nv">SOURCE_ROOT</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="linenos">103</span><span class="w">    </span>--mpi<span class="o">=</span>pmix<span class="w"> </span><span class="se">\</span>
+<span class="linenos">104</span><span class="w">    </span>bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">&quot;</span>
+<span class="linenos">105</span><span class="s2">        set -ex</span>
+<span class="linenos">106</span><span class="s2">        </span><span class="nv">$PROLOGUE</span>
+<span class="linenos">107</span><span class="s2">        export PATH=</span><span class="nv">$PATH</span><span class="s2">:~/.local/bin</span>
+<span class="linenos">108</span>
+<span class="linenos">109</span><span class="s2">        # This is optional</span>
+<span class="linenos">110</span><span class="s2">        cat &gt; /tmp/pytorch_extra_args.txt &lt;&lt; EOF</span>
+<span class="linenos">111</span><span class="s2">cuda_graph_config: null</span>
+<span class="linenos">112</span><span class="s2">print_iter_log: true</span>
+<span class="linenos">113</span><span class="s2">enable_attention_dp: false</span>
+<span class="linenos">114</span><span class="s2">EOF</span>
+<span class="linenos">115</span>
+<span class="linenos">116</span><span class="s2">        # launch the benchmark</span>
+<span class="linenos">117</span><span class="s2">        trtllm-llmapi-launch \</span>
+<span class="linenos">118</span><span class="s2">         trtllm-bench \</span>
+<span class="linenos">119</span><span class="s2">            --model </span><span class="nv">$MODEL_NAME</span><span class="s2"> \</span>
+<span class="linenos">120</span><span class="s2">            --model_path </span><span class="nv">$LOCAL_MODEL</span><span class="s2"> \</span>
+<span class="linenos">121</span><span class="s2">            throughput \</span>
+<span class="linenos">122</span><span class="s2">            --dataset </span><span class="nv">$data_path</span><span class="s2"> \</span>
+<span class="linenos">123</span><span class="s2">            --backend pytorch \</span>
+<span class="linenos">124</span><span class="s2">            --tp 16 \</span>
+<span class="linenos">125</span><span class="s2">            --extra_llm_api_options /tmp/pytorch_extra_args.txt \</span>
+<span class="linenos">126</span><span class="s2">            </span><span class="nv">$EXTRA_ARGS</span>
+<span class="linenos">127</span><span class="s2">    &quot;</span>
 </pre></div>
 </div>
 </section>
@@ -778,9 +779,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_mgmn_trtllm_serve.html b/latest/examples/llm_mgmn_trtllm_serve.html
index 0b6c6af1d3..11ae15b211 100644
--- a/latest/examples/llm_mgmn_trtllm_serve.html
+++ b/latest/examples/llm_mgmn_trtllm_serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="run-trtllm-serve-with-pytorch-backend-on-slurm">
 <h1>Run trtllm-serve with pytorch backend on Slurm<a class="headerlink" href="#run-trtllm-serve-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_mgmn_trtllm_serve.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_mgmn_trtllm_serve.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos"> 2</span><span class="c1">#SBATCH -A &lt;account&gt;</span>
 <span class="linenos"> 3</span><span class="c1">#SBATCH -p &lt;partition&gt;</span>
@@ -739,9 +741,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_multilora.html b/latest/examples/llm_multilora.html
index 2fdb853215..d49b224f7e 100644
--- a/latest/examples/llm_multilora.html
+++ b/latest/examples/llm_multilora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="generate-text-with-multiple-lora-adapters">
 <h1>Generate text with multiple LoRA adapters<a class="headerlink" href="#generate-text-with-multiple-lora-adapters" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_multilora.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_multilora.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">import</span><span class="w"> </span><span class="nn">argparse</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span>
@@ -736,9 +738,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_runtime.html b/latest/examples/llm_runtime.html
index 68bf7a1b4a..1f28720681 100644
--- a/latest/examples/llm_runtime.html
+++ b/latest/examples/llm_runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="runtime-configuration-examples">
 <h1>Runtime Configuration Examples<a class="headerlink" href="#runtime-configuration-examples" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_runtime.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_runtime.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&#39;&#39;&#39;</span>
 <span class="linenos">  2</span><span class="sd">This script demonstrates various runtime configuration options in TensorRT-LLM,</span>
 <span class="linenos">  3</span><span class="sd">including KV cache management and CUDA graph optimizations.</span>
@@ -791,9 +793,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_sampling.html b/latest/examples/llm_sampling.html
index f17ed59f97..52068114fb 100644
--- a/latest/examples/llm_sampling.html
+++ b/latest/examples/llm_sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="sampling-techniques-showcase">
 <h1>Sampling Techniques Showcase<a class="headerlink" href="#sampling-techniques-showcase" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_sampling.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_sampling.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&quot;&quot;&quot;</span>
 <span class="linenos">  2</span><span class="sd">This example demonstrates various sampling techniques available in TensorRT-LLM.</span>
 <span class="linenos">  3</span><span class="sd">It showcases different sampling parameters and their effects on text generation.</span>
@@ -895,9 +897,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_sparse_attention.html b/latest/examples/llm_sparse_attention.html
index aee6319ae0..4b3756f140 100644
--- a/latest/examples/llm_sparse_attention.html
+++ b/latest/examples/llm_sparse_attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="sparse-attention">
 <h1>Sparse Attention<a class="headerlink" href="#sparse-attention" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_sparse_attention.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_sparse_attention.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&quot;&quot;&quot;</span>
 <span class="linenos">  2</span><span class="sd">This example demonstrates how to use sparse attention with TensorRT-LLM.</span>
 <span class="linenos">  3</span>
@@ -876,9 +878,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/llm_speculative_decoding.html b/latest/examples/llm_speculative_decoding.html
index 5999d2696f..909c175355 100644
--- a/latest/examples/llm_speculative_decoding.html
+++ b/latest/examples/llm_speculative_decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -520,7 +522,7 @@
                   
   <section id="speculative-decoding">
 <h1>Speculative Decoding<a class="headerlink" href="#speculative-decoding" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/llm-api/llm_speculative_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/llm-api/llm_speculative_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">import</span><span class="w"> </span><span class="nn">click</span>
@@ -742,9 +744,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_chat_client.html b/latest/examples/openai_chat_client.html
index c363cf2d6b..0aff0babcc 100644
--- a/latest/examples/openai_chat_client.html
+++ b/latest/examples/openai_chat_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="openai-chat-client">
 <h1>OpenAI Chat Client<a class="headerlink" href="#openai-chat-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_chat_client.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_chat_client.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
 <span class="linenos"> 3</span>
@@ -671,9 +673,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_chat_client_for_multimodal.html b/latest/examples/openai_chat_client_for_multimodal.html
index 965cea0fce..e7560a5d49 100644
--- a/latest/examples/openai_chat_client_for_multimodal.html
+++ b/latest/examples/openai_chat_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="openai-chat-client-for-multimodal">
 <h1>OpenAI Chat Client for Multimodal<a class="headerlink" href="#openai-chat-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_chat_client_for_multimodal.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_chat_client_for_multimodal.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span>
 <span class="linenos">  2</span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
 <span class="linenos">  3</span><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
@@ -779,9 +781,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_completion_client.html b/latest/examples/openai_completion_client.html
index 2755787a0b..3c283022b7 100644
--- a/latest/examples/openai_completion_client.html
+++ b/latest/examples/openai_completion_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="openai-completion-client">
 <h1>OpenAI Completion Client<a class="headerlink" href="#openai-completion-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
 <span class="linenos"> 3</span>
@@ -665,9 +667,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_completion_client_for_lora.html b/latest/examples/openai_completion_client_for_lora.html
index 4473b214fd..e2d461b39a 100644
--- a/latest/examples/openai_completion_client_for_lora.html
+++ b/latest/examples/openai_completion_client_for_lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="openai-completion-client-for-lora">
 <h1>Openai Completion Client For Lora<a class="headerlink" href="#openai-completion-client-for-lora" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client_for_lora.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client_for_lora.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="c1">### OpenAI Completion Client</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
@@ -681,9 +683,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_completion_client_json_schema.html b/latest/examples/openai_completion_client_json_schema.html
index 829b5b1276..7803a28fb1 100644
--- a/latest/examples/openai_completion_client_json_schema.html
+++ b/latest/examples/openai_completion_client_json_schema.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -70,13 +70,13 @@
 
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="Dynamo K8s Example" href="dynamo_k8s_example.html" />
+    <link rel="next" title="OpenAI Responses Client" href="openai_responses_client.html" />
     <link rel="prev" title="Openai Completion Client For Lora" href="openai_completion_client_for_lora.html" />
 
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2 current active"><a class="current reference internal" href="#">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -521,7 +523,7 @@
   <section id="openai-completion-client-with-json-schema">
 <h1>OpenAI Completion Client with JSON Schema<a class="headerlink" href="#openai-completion-client-with-json-schema" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/e4c707845ff58fcc0b1d87afb4dd0e64885c780a/examples/serve/openai_completion_client_json_schema.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_completion_client_json_schema.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="c1"># This example requires to specify `guided_decoding_backend` as</span>
 <span class="linenos"> 3</span><span class="c1"># `xgrammar` or `llguidance` in the extra_llm_api_options.yaml file.</span>
@@ -597,11 +599,11 @@
       </div>
     </a>
     <a class="right-next"
-       href="dynamo_k8s_example.html"
+       href="openai_responses_client.html"
        title="next page">
       <div class="prev-next-info">
         <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Dynamo K8s Example</p>
+        <p class="prev-next-title">OpenAI Responses Client</p>
       </div>
       <i class="fa-solid fa-angle-right"></i>
     </a>
@@ -702,9 +704,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/examples/openai_responses_client.html b/latest/examples/openai_responses_client.html
new file mode 100644
index 0000000000..f72a6dc5f1
--- /dev/null
+++ b/latest/examples/openai_responses_client.html
@@ -0,0 +1,684 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="../" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>OpenAI Responses Client &#8212; TensorRT LLM</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  <!--
+    this give us a css class that will be invisible only if js is disabled
+  -->
+  <noscript>
+    <style>
+      .pst-js-only { display: none !important; }
+
+    </style>
+  </noscript>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
+
+    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
+    <link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
+    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+    <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+    <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+    <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=19d20f17" />
+  
+  <!-- So that users can add custom icons -->
+  <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
+<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
+
+
+
+    <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="../_static/doctools.js?v=9a2dae69"></script>
+    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+    <script src="../_static/copybutton.js?v=65e89d2a"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'examples/openai_responses_client';</script>
+    <script>
+        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
+        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
+        DOCUMENTATION_OPTIONS.show_version_warning_banner =
+            false;
+        </script>
+
+    <link rel="icon" href="../_static/favicon.png"/>
+
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Dynamo K8s Example" href="dynamo_k8s_example.html" />
+    <link rel="prev" title="OpenAI Completion Client with JSON Schema" href="openai_completion_client_json_schema.html" />
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  <meta name="docsearch:version" content="1.2.0rc6" />
+
+
+  </head>
+
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+
+
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <dialog id="pst-search-dialog">
+    
+<form class="bd-search d-flex align-items-center"
+      action="../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         placeholder="Search the docs ..."
+         aria-label="Search the docs ..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+  </dialog>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+<div class="bd-header__inner bd-page-width">
+  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
+    <span class="fa-solid fa-bars"></span>
+  </button>
+  
+  
+  <div class="col-lg-3 navbar-header-items__start">
+    
+      <div class="navbar-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a></div>
+    
+  </div>
+  
+  <div class="col-lg-9 navbar-header-items">
+    
+    <div class="me-auto navbar-header-items__center">
+      
+        <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-2"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-2"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-2"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-2">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+      
+    </div>
+    
+    
+    <div class="navbar-header-items__end">
+      
+        <div class="navbar-item navbar-persistent--container">
+          
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+        </div>
+      
+      
+        <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+      
+    </div>
+    
+  </div>
+  
+  
+    <div class="navbar-persistent--mobile">
+
+<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+ <i class="fa-solid fa-magnifying-glass"></i>
+ <span class="search-button__default-text">Search</span>
+ <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
+</button>
+    </div>
+  
+
+  
+</div>
+
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <dialog id="pst-primary-sidebar-modal"></dialog>
+      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
+        
+
+
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../index.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
+    <img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>
+  
+  
+    <p class="title logo__title">TensorRT LLM</p>
+  
+</a>
+
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+      <div class="sidebar-header-items__center">
+        
+          
+          
+            <div class="navbar-item">
+
+
+<div class="version-switcher__container dropdown pst-js-only">
+  <button id="pst-version-switcher-button-3"
+    type="button"
+    class="version-switcher__button btn btn-sm dropdown-toggle"
+    data-bs-toggle="dropdown"
+    aria-haspopup="listbox"
+    aria-controls="pst-version-switcher-list-3"
+    aria-label="Version switcher list"
+  >
+    Choose version  <!-- this text may get changed later by javascript -->
+    <span class="caret"></span>
+  </button>
+  <div id="pst-version-switcher-list-3"
+    class="version-switcher__menu dropdown-menu list-group-flush py-0"
+    role="listbox" aria-labelledby="pst-version-switcher-button-3">
+    <!-- dropdown will be populated by javascript on page load -->
+  </div>
+</div></div>
+          
+        
+      </div>
+    
+    
+    
+      <div class="sidebar-header-items__end">
+        
+          <div class="navbar-item">
+
+<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
+  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
+  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
+</button></div>
+        
+      </div>
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+
+
+<nav class="bd-docs-nav bd-links"
+     aria-label="Table of Contents">
+  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
+  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../installation/containers.html">Pre-built release container images on NGC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 has-children"><a class="reference internal" href="llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference.html">Generate text</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_async.html">Generate text asynchronously</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_async_streaming.html">Generate text in streaming</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_inference_distributed.html">Distributed LLM Generation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_guided_decoding.html">Generate text with guided decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_logits_processor.html">Control generated text using logits processor</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_sparse_attention.html">Sparse Attention</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_speculative_decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_kv_cache_connector.html">KV Cache Connector</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_runtime.html">Runtime Configuration Examples</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_sampling.html">Sampling Techniques Showcase</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1 current active has-children"><a class="reference internal" href="trtllm_serve_examples.html">Online Serving Examples</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
+<li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_chat_client.html">OpenAI Chat Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2 current active"><a class="current reference internal" href="#">OpenAI Responses Client</a></li>
+</ul>
+</details></li>
+<li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../deployment-guide/index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-qwen3-on-trtllm.html">Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.html">Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell</a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../models/supported-models.html">Supported Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../models/adding-new-model.html">Adding a New Model</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-bench.html">trtllm-bench</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-eval.html">trtllm-eval</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="../commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
+</ul>
+</details></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">LLM API Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/disagg-serving.html">Disaggregated Serving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kvcache.html">KV Cache System</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/long-sequence.html">Long Sequences</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/overlap-scheduler.html">Overlap Scheduler</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/sampling.html">Sampling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/additional-outputs.html">Additional Outputs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/guided-decoding.html">Guided Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/speculative-decoding.html">Speculative Decoding</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/checkpoint-loading.html">Checkpoint Loading</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/helix.html">Helix Parallelism</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../features/kv-cache-connector.html">KV Cache Connector</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/overview.html">Architecture Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-analysis.html">Performance Analysis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/dev-containers.html">Using Dev Containers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/api-change.html">LLM API Change Guide</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
+</ul>
+</div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item">
+
+<nav aria-label="Breadcrumb" class="d-print-none">
+  <ul class="bd-breadcrumbs">
+    
+    <li class="breadcrumb-item breadcrumb-home">
+      <a href="../index.html" class="nav-link" aria-label="Home">
+        <i class="fa-solid fa-home"></i>
+      </a>
+    </li>
+    
+    <li class="breadcrumb-item"><a href="trtllm_serve_examples.html" class="nav-link">Online Serving Examples</a></li>
+    
+    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">OpenAI Responses Client</span></li>
+  </ul>
+</nav>
+</div>
+      
+    </div>
+  
+  
+</div>
+</div>
+              
+              
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section id="openai-responses-client">
+<h1>OpenAI Responses Client<a class="headerlink" href="#openai-responses-client" title="Link to this heading">#</a></h1>
+<p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/9ba14263db0045ed3fa0860f949b5ce320107eb3/examples/serve/openai_responses_client.py">NVIDIA/TensorRT-LLM</a>.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
+<span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
+<span class="linenos"> 3</span>
+<span class="linenos"> 4</span><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
+<span class="linenos"> 5</span>    <span class="n">base_url</span><span class="o">=</span><span class="s2">&quot;http://localhost:8000/v1&quot;</span><span class="p">,</span>
+<span class="linenos"> 6</span>    <span class="n">api_key</span><span class="o">=</span><span class="s2">&quot;tensorrt_llm&quot;</span><span class="p">,</span>
+<span class="linenos"> 7</span><span class="p">)</span>
+<span class="linenos"> 8</span>
+<span class="linenos"> 9</span><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">responses</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
+<span class="linenos">10</span>    <span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">,</span>
+<span class="linenos">11</span>    <span class="nb">input</span><span class="o">=</span><span class="s2">&quot;Where is New York?&quot;</span><span class="p">,</span>
+<span class="linenos">12</span>    <span class="n">max_output_tokens</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span>
+<span class="linenos">13</span><span class="p">)</span>
+<span class="linenos">14</span><span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
+</pre></div>
+</div>
+</section>
+
+
+                </article>
+              
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="openai_completion_client_json_schema.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">OpenAI Completion Client with JSON Schema</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="dynamo_k8s_example.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Dynamo K8s Example</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+
+<div class="bd-sidebar-secondary"></div>
+
+
+              
+            
+
+          </div>
+          <footer class="bd-footer-content">
+            
+          </footer>
+        
+      </main>
+    </div>
+  </div>
+  
+
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
+<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
+
+
+  <footer class="bd-footer">
+<div class="bd-footer__inner bd-page-width">
+  
+    <div class="footer-items__start">
+      
+        <div class="footer-item">
+<a class="footer-brand logo" href="https://www.nvidia.com">
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
+  <img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
+</a></div>
+      
+        <div class="footer-item">
+
+<div class="footer-links">
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
+   | 
+  
+  
+  
+  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
+  
+  
+  
+</div>
+</div>
+      
+        <div class="footer-item">
+
+
+
+
+  <p class="copyright">
+    
+      Copyright © 2025, NVidia.
+      <br/>
+    
+  </p>
+</div>
+      
+        <div class="footer-item">
+<div class="extra_footer">
+  
+  <p>Last updated on December 15, 2025.</p>
+  
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
+  
+</div></div>
+      
+    </div>
+  
+  
+  
+</div>
+
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/latest/examples/trtllm_serve_examples.html b/latest/examples/trtllm_serve_examples.html
index e8875e5b4e..4137db46d7 100644
--- a/latest/examples/trtllm_serve_examples.html
+++ b/latest/examples/trtllm_serve_examples.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -522,6 +524,7 @@
 <li class="toctree-l1"><a class="reference internal" href="curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l1"><a class="reference internal" href="curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l1"><a class="reference internal" href="curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l1"><a class="reference internal" href="curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l1"><a class="reference internal" href="deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l1"><a class="reference internal" href="genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l1"><a class="reference internal" href="genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -530,6 +533,7 @@
 <li class="toctree-l1"><a class="reference internal" href="openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l1"><a class="reference internal" href="openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l1"><a class="reference internal" href="openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l1"><a class="reference internal" href="openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </div>
 </section>
@@ -659,9 +663,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/additional-outputs.html b/latest/features/additional-outputs.html
index 3c0979cb00..77c15e37e8 100644
--- a/latest/features/additional-outputs.html
+++ b/latest/features/additional-outputs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -729,9 +731,9 @@ and <code class="docutils literal notranslate"><span class="pre">sequence.additi
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/attention.html b/latest/features/attention.html
index de27b797f5..5b719bd665 100644
--- a/latest/features/attention.html
+++ b/latest/features/attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -857,7 +859,7 @@ reach that point).</p>
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocates new blocks from a pool and recycles those
 blocks when required. See the implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/_torch/pyexecutor/resource_manager.py"><code class="docutils literal notranslate"><span class="pre">KVCacheManager</span></code></a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/_torch/pyexecutor/resource_manager.py"><code class="docutils literal notranslate"><span class="pre">KVCacheManager</span></code></a>.</p>
 </section>
 <section id="int8-fp8-kv-caches">
 <h4>INT8/FP8 KV Caches<a class="headerlink" href="#int8-fp8-kv-caches" title="Link to this heading">#</a></h4>
@@ -1148,9 +1150,9 @@ is computed as:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html b/latest/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
index f3e35d27c3..9886c400ec 100644
--- a/latest/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
+++ b/latest/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -785,9 +787,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/advanced/example_run.html b/latest/features/auto_deploy/advanced/example_run.html
index c149e5807e..006a450496 100644
--- a/latest/features/auto_deploy/advanced/example_run.html
+++ b/latest/features/auto_deploy/advanced/example_run.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -701,9 +703,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/advanced/expert_configurations.html b/latest/features/auto_deploy/advanced/expert_configurations.html
index 7138346d87..ee86ae03fd 100644
--- a/latest/features/auto_deploy/advanced/expert_configurations.html
+++ b/latest/features/auto_deploy/advanced/expert_configurations.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -808,9 +810,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/advanced/logging.html b/latest/features/auto_deploy/advanced/logging.html
index c6b535c6a4..64442e30d8 100644
--- a/latest/features/auto_deploy/advanced/logging.html
+++ b/latest/features/auto_deploy/advanced/logging.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -634,9 +636,9 @@ decreasing verbosity;</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/advanced/workflow.html b/latest/features/auto_deploy/advanced/workflow.html
index 0c3c1f8ee7..cfb0e53626 100644
--- a/latest/features/auto_deploy/advanced/workflow.html
+++ b/latest/features/auto_deploy/advanced/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -647,9 +649,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/auto-deploy.html b/latest/features/auto_deploy/auto-deploy.html
index 8b021873e0..baa7d7620a 100644
--- a/latest/features/auto_deploy/auto-deploy.html
+++ b/latest/features/auto_deploy/auto-deploy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -744,9 +746,9 @@ We welcome community contributions, see <code class="docutils literal notranslat
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/auto_deploy/support_matrix.html b/latest/features/auto_deploy/support_matrix.html
index fa195701c2..8ae5c83908 100644
--- a/latest/features/auto_deploy/support_matrix.html
+++ b/latest/features/auto_deploy/support_matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -674,7 +676,7 @@ In addition, the following models have been officially validated using the defau
 </section>
 <section id="precision-support">
 <h2>Precision Support<a class="headerlink" href="#precision-support" title="Link to this heading">#</a></h2>
-<p>AutoDeploy supports models with various precision formats, including quantized checkpoints generated by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer"><code class="docutils literal notranslate"><span class="pre">TensorRT-Model-Optimizer</span></code></a>.</p>
+<p>AutoDeploy supports models with various precision formats, including quantized checkpoints generated by <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer"><code class="docutils literal notranslate"><span class="pre">Model-Optimizer</span></code></a>.</p>
 <p><strong>Supported precision types include:</strong></p>
 <ul class="simple">
 <li><p>BF16 / FP16 / FP32</p></li>
@@ -811,9 +813,9 @@ In addition, the following models have been officially validated using the defau
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/checkpoint-loading.html b/latest/features/checkpoint-loading.html
index ca9b674b58..a5fd9909ea 100644
--- a/latest/features/checkpoint-loading.html
+++ b/latest/features/checkpoint-loading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1007,9 +1009,9 @@ Likewise, if the format shares some components with an already supported framewo
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/disagg-serving.html b/latest/features/disagg-serving.html
index 25c40aba8f..bf3fdaa5b9 100644
--- a/latest/features/disagg-serving.html
+++ b/latest/features/disagg-serving.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -906,9 +908,9 @@ when routing requests to the generation servers, the disaggregated server will m
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/feature-combination-matrix.html b/latest/features/feature-combination-matrix.html
index 366b3364aa..501b1af7f5 100644
--- a/latest/features/feature-combination-matrix.html
+++ b/latest/features/feature-combination-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -923,9 +925,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/guided-decoding.html b/latest/features/guided-decoding.html
index 485f2d186c..592b0a7943 100644
--- a/latest/features/guided-decoding.html
+++ b/latest/features/guided-decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1233,9 +1235,9 @@ trtllm-serve<span class="w"> </span>nvidia/Llama-3.1-8B-Instruct-FP8<span class=
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/helix.html b/latest/features/helix.html
index f12dfc95f1..a9031e708e 100644
--- a/latest/features/helix.html
+++ b/latest/features/helix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -775,9 +777,9 @@ Test name: <code class="docutils literal notranslate"><span class="pre">TestDeep
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/kv-cache-connector.html b/latest/features/kv-cache-connector.html
index 84af8d5995..f72dc08415 100644
--- a/latest/features/kv-cache-connector.html
+++ b/latest/features/kv-cache-connector.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -818,9 +820,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/kvcache.html b/latest/features/kvcache.html
index b5991d90de..70df8d464d 100644
--- a/latest/features/kvcache.html
+++ b/latest/features/kvcache.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -755,9 +757,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/long-sequence.html b/latest/features/long-sequence.html
index 56cdd11053..3d9801e624 100644
--- a/latest/features/long-sequence.html
+++ b/latest/features/long-sequence.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -724,9 +726,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/lora.html b/latest/features/lora.html
index dcf87e1b65..cf4ee8df68 100644
--- a/latest/features/lora.html
+++ b/latest/features/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -928,9 +930,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/multi-modality.html b/latest/features/multi-modality.html
index c1d5236efa..5d65982044 100644
--- a/latest/features/multi-modality.html
+++ b/latest/features/multi-modality.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -551,7 +553,7 @@
 <p>The following examples demonstrate how to use TensorRT LLM’s multimodal support in various scenarios, including quick run examples, serving endpoints, and performance benchmarking.</p>
 <section id="quick-start">
 <h3>Quick start<a class="headerlink" href="#quick-start" title="Link to this heading">#</a></h3>
-<p>Quickly try out TensorRT LLM’s multimodal support using our <code class="docutils literal notranslate"><span class="pre">LLM-API</span></code> and a ready-to-run <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/llm-api/quickstart_multimodal.py">example</a>:</p>
+<p>Quickly try out TensorRT LLM’s multimodal support using our <code class="docutils literal notranslate"><span class="pre">LLM-API</span></code> and a ready-to-run <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/llm-api/quickstart_multimodal.py">example</a>:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>quickstart_multimodal.py<span class="w"> </span>--model_dir<span class="w"> </span>Efficient-Large-Model/NVILA-8B<span class="w"> </span>--modality<span class="w"> </span>image<span class="w"> </span>--disable_kv_cache_reuse
 </pre></div>
 </div>
@@ -562,7 +564,7 @@
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>trtllm-serve<span class="w"> </span>Qwen/Qwen2-VL-7B-Instruct<span class="w">  </span>--backend<span class="w"> </span>pytorch
 </pre></div>
 </div>
-<p>You can then send OpenAI-compatible requests, such as via curl or API clients, to the server endpoint. See <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/serve/curl_chat_client_for_multimodal.sh">curl chat client for multimodal script</a> as an example.</p>
+<p>You can then send OpenAI-compatible requests, such as via curl or API clients, to the server endpoint. See <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/serve/curl_chat_client_for_multimodal.sh">curl chat client for multimodal script</a> as an example.</p>
 </section>
 <section id="run-with-trtllm-bench">
 <h3>Run with <a class="reference internal" href="../commands/trtllm-bench.html"><span class="std std-doc"><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span></code></span></a><a class="headerlink" href="#run-with-trtllm-bench" title="Link to this heading">#</a></h3>
@@ -720,9 +722,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/overlap-scheduler.html b/latest/features/overlap-scheduler.html
index 9a3b76ead9..f27fef3d85 100644
--- a/latest/features/overlap-scheduler.html
+++ b/latest/features/overlap-scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -702,9 +704,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/paged-attention-ifb-scheduler.html b/latest/features/paged-attention-ifb-scheduler.html
index c57904ceee..cfb6463920 100644
--- a/latest/features/paged-attention-ifb-scheduler.html
+++ b/latest/features/paged-attention-ifb-scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -607,9 +609,9 @@ different types of KV caches: <strong>contiguous</strong> and <strong>paged</str
 <p>The paged KV cache decomposes the KV cache into blocks that are distributed to
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocates new blocks from a pool, and recycles those blocks when required. See the simplified implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
 A more efficient C++ implementation is included in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
 </section>
 </section>
 <section id="the-schedulers">
@@ -798,9 +800,9 @@ A more efficient C++ implementation is included in the
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/parallel-strategy.html b/latest/features/parallel-strategy.html
index f6b0172fb4..9c7357c127 100644
--- a/latest/features/parallel-strategy.html
+++ b/latest/features/parallel-strategy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -908,9 +910,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/quantization.html b/latest/features/quantization.html
index a2ece9aad2..c09c2b24a4 100644
--- a/latest/features/quantization.html
+++ b/latest/features/quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -531,6 +533,7 @@
 <li><p>FP8 Block Scaling</p></li>
 <li><p>FP8 Rowwise</p></li>
 <li><p>FP8 KV Cache</p></li>
+<li><p>NVFP4 KV Cache</p></li>
 <li><p>W4A16 GPTQ</p></li>
 <li><p>W4A8 GPTQ</p></li>
 <li><p>W4A16 AWQ</p></li>
@@ -542,7 +545,7 @@
 <p>The default PyTorch backend supports FP4 and FP8 quantization on the latest Blackwell and Hopper GPUs.</p>
 <section id="running-pre-quantized-models">
 <h3>Running Pre-quantized Models<a class="headerlink" href="#running-pre-quantized-models" title="Link to this heading">#</a></h3>
-<p>TensorRT LLM can directly run <a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">pre-quantized models</a> generated with the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">NVIDIA TensorRT Model Optimizer</a>.</p>
+<p>TensorRT LLM can directly run <a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">pre-quantized models</a> generated with the <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">NVIDIA Model Optimizer</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span>
 <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s1">&#39;nvidia/Llama-3.1-8B-Instruct-FP8&#39;</span><span class="p">)</span>
 <span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="s2">&quot;Hello, my name is&quot;</span><span class="p">)</span>
@@ -563,16 +566,38 @@
 </pre></div>
 </div>
 </section>
+<section id="nvfp4-kv-cache">
+<h4>NVFP4 KV Cache<a class="headerlink" href="#nvfp4-kv-cache" title="Link to this heading">#</a></h4>
+<p>To enable NVFP4 KV cache, offline quantization with ModelOpt is required. Please follow the below section for instructions.
+After the quantization is done, the NVFP4 KV cache option can be set by:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span>
+<span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.llmapi</span><span class="w"> </span><span class="kn">import</span> <span class="n">KvCacheConfig</span>
+<span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s1">&#39;/path/to/model&#39;</span><span class="p">,</span>
+          <span class="n">kv_cache_config</span><span class="o">=</span><span class="n">KvCacheConfig</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;nvfp4&#39;</span><span class="p">))</span>
+<span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="s2">&quot;Hello, my name is&quot;</span><span class="p">)</span>
+</pre></div>
+</div>
+</section>
 </section>
 <section id="offline-quantization-with-modelopt">
 <h3>Offline Quantization with ModelOpt<a class="headerlink" href="#offline-quantization-with-modelopt" title="Link to this heading">#</a></h3>
 <p>If a pre-quantized model is not available on the <a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">Hugging Face Hub</a>, you can quantize it offline using ModelOpt.</p>
 <p>Follow this step-by-step guide to quantize a model:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
-<span class="nb">cd</span><span class="w"> </span>TensorRT-Model-Optimizer/examples/llm_ptq
-scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </span>&lt;huggingface_model_card&gt;<span class="w"> </span>--quant<span class="w"> </span>fp8<span class="w"> </span>--export_fmt<span class="w"> </span>hf
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/NVIDIA/Model-Optimizer.git
+<span class="nb">cd</span><span class="w"> </span>Model-Optimizer/examples/llm_ptq
+scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </span>&lt;huggingface_model_card&gt;<span class="w"> </span>--quant<span class="w"> </span>fp8
 </pre></div>
 </div>
+<section id="id1">
+<h4>NVFP4 KV Cache<a class="headerlink" href="#id1" title="Link to this heading">#</a></h4>
+<p>To generate the checkpoint for NVFP4 KV cache:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/NVIDIA/Model-Optimizer.git
+<span class="nb">cd</span><span class="w"> </span>TensorRT-Model-Optimizer/examples/llm_ptq
+scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </span>&lt;huggingface_model_card&gt;<span class="w"> </span>--quant<span class="w"> </span>fp8<span class="w"> </span>--kv_cache_quant<span class="w"> </span>nvfp4
+</pre></div>
+</div>
+<p>Note that currently TRT-LLM only supports FP8 weight/activation quantization when NVFP4 KV cache is enabled. Therefore, <code class="docutils literal notranslate"><span class="pre">--quant</span> <span class="pre">fp8</span></code> is required here.</p>
+</section>
 </section>
 </section>
 <section id="model-supported-matrix">
@@ -586,6 +611,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <th class="head text-center"><p>FP8(block scaling)</p></th>
 <th class="head text-center"><p>FP8(rowwise)</p></th>
 <th class="head text-center"><p>FP8 KV Cache</p></th>
+<th class="head text-center"><p>NVFP4 KV Cache</p></th>
 <th class="head text-center"><p>W4A8 AWQ</p></th>
 <th class="head text-center"><p>W4A16 AWQ</p></th>
 <th class="head text-center"><p>W4A8 GPTQ</p></th>
@@ -604,6 +630,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>DeepSeek-R1</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -616,6 +643,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-even"><td class="text-left"><p>EXAONE</p></td>
 <td class="text-center"><p>.</p></td>
@@ -624,6 +652,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
@@ -636,6 +665,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
@@ -652,6 +682,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>LLaMA</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -661,6 +692,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -674,6 +706,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 </tr>
@@ -685,6 +718,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
@@ -700,6 +734,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>Mistral</p></td>
 <td class="text-center"><p>.</p></td>
@@ -709,6 +744,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
@@ -724,6 +760,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>Phi</p></td>
 <td class="text-center"><p>.</p></td>
@@ -732,6 +769,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
@@ -744,6 +782,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
@@ -756,6 +795,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
@@ -768,6 +808,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
@@ -784,6 +825,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-even"><td class="text-left"><p>BLIP2-T5</p></td>
 <td class="text-center"><p>.</p></td>
@@ -796,6 +838,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>LLaVA</p></td>
 <td class="text-center"><p>.</p></td>
@@ -805,6 +848,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -817,6 +861,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -832,6 +877,7 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 </tbody>
 </table>
@@ -853,6 +899,7 @@ The language component decides which quantization methods are supported by a giv
 <th class="head text-center"><p>FP8(block scaling)</p></th>
 <th class="head text-center"><p>FP8(rowwise)</p></th>
 <th class="head text-center"><p>FP8 KV Cache</p></th>
+<th class="head text-center"><p>NVFP4 KV Cache</p></th>
 <th class="head text-center"><p>W4A8 AWQ</p></th>
 <th class="head text-center"><p>W4A16 AWQ</p></th>
 <th class="head text-center"><p>W4A8 GPTQ</p></th>
@@ -871,6 +918,7 @@ The language component decides which quantization methods are supported by a giv
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 </tr>
 <tr class="row-odd"><td class="text-left"><p>Blackwell(sm100)</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -879,6 +927,7 @@ The language component decides which quantization methods are supported by a giv
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
@@ -891,6 +940,7 @@ The language component decides which quantization methods are supported by a giv
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -903,6 +953,7 @@ The language component decides which quantization methods are supported by a giv
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -916,6 +967,7 @@ The language component decides which quantization methods are supported by a giv
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
+<td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
 <td class="text-center"><p>.</p></td>
 <td class="text-center"><p>Y</p></td>
@@ -932,7 +984,7 @@ The language component decides which quantization methods are supported by a giv
 <h2>Quick Links<a class="headerlink" href="#quick-links" title="Link to this heading">#</a></h2>
 <ul class="simple">
 <li><p><a class="reference external" href="https://huggingface.co/collections/nvidia/model-optimizer-66aa84f7966b3150262481a4">Pre-quantized Models by ModelOpt</a></p></li>
-<li><p><a class="reference external" href="https://nvidia.github.io/TensorRT-Model-Optimizer/guides/0_support_matrix.html">ModelOpt Support Matrix</a></p></li>
+<li><p><a class="reference external" href="https://nvidia.github.io/Model-Optimizer/guides/0_support_matrix.html">ModelOpt Support Matrix</a></p></li>
 </ul>
 </section>
 </section>
@@ -990,9 +1042,13 @@ The language component decides which quantization methods are supported by a giv
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#running-pre-quantized-models">Running Pre-quantized Models</a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#fp8-kv-cache">FP8 KV Cache</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#nvfp4-kv-cache">NVFP4 KV Cache</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#offline-quantization-with-modelopt">Offline Quantization with ModelOpt</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">NVFP4 KV Cache</a></li>
 </ul>
 </li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#offline-quantization-with-modelopt">Offline Quantization with ModelOpt</a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-supported-matrix">Model Supported Matrix</a></li>
@@ -1089,9 +1145,9 @@ The language component decides which quantization methods are supported by a giv
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/ray-orchestrator.html b/latest/features/ray-orchestrator.html
index 5f983f6cc9..0f7800366f 100644
--- a/latest/features/ray-orchestrator.html
+++ b/latest/features/ray-orchestrator.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -711,9 +713,9 @@ pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </s
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/sampling.html b/latest/features/sampling.html
index 6d884faa06..c83c1a3c01 100644
--- a/latest/features/sampling.html
+++ b/latest/features/sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -527,7 +529,7 @@
 <p>To use the feature:</p>
 <ol class="arabic simple">
 <li><p>Enable the <code class="docutils literal notranslate"><span class="pre">enable_trtllm_sampler</span></code> option in the <code class="docutils literal notranslate"><span class="pre">LLM</span></code> class</p></li>
-<li><p>Pass a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></a> object with the desired options to the <code class="docutils literal notranslate"><span class="pre">generate()</span></code> function</p></li>
+<li><p>Pass a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></a> object with the desired options to the <code class="docutils literal notranslate"><span class="pre">generate()</span></code> function</p></li>
 </ol>
 <p>The following example prepares two identical prompts which will give different results due to the sampling parameters chosen:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -583,7 +585,7 @@
 <p>Logits processors allow you to modify the logits produced by the network before sampling, enabling custom generation behavior and constraints.</p>
 <p>To use a custom logits processor:</p>
 <ol class="arabic simple">
-<li><p>Create a custom class that inherits from <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">LogitsProcessor</span></code></a> and implements the <code class="docutils literal notranslate"><span class="pre">__call__</span></code> method</p></li>
+<li><p>Create a custom class that inherits from <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">LogitsProcessor</span></code></a> and implements the <code class="docutils literal notranslate"><span class="pre">__call__</span></code> method</p></li>
 <li><p>Pass an instance of this class to the <code class="docutils literal notranslate"><span class="pre">logits_processor</span></code> parameter of <code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></p></li>
 </ol>
 <p>The following example demonstrates logits processing:</p>
@@ -611,7 +613,7 @@
 <span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">([</span><span class="s2">&quot;Hello, my name is&quot;</span><span class="p">],</span> <span class="n">sampling_params</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>You can find a more detailed example on logits processors <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/llm-api/llm_logits_processor.py">here</a>.</p>
+<p>You can find a more detailed example on logits processors <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/llm-api/llm_logits_processor.py">here</a>.</p>
 </section>
 </section>
 
@@ -758,9 +760,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/speculative-decoding.html b/latest/features/speculative-decoding.html
index 48a6112c9e..02ae974cb5 100644
--- a/latest/features/speculative-decoding.html
+++ b/latest/features/speculative-decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -943,9 +945,9 @@ function. In practice, this is very cheap since the blocks are just marked as av
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/features/torch_compile_and_piecewise_cuda_graph.html b/latest/features/torch_compile_and_piecewise_cuda_graph.html
index 67d9a26fb1..65be58087d 100644
--- a/latest/features/torch_compile_and_piecewise_cuda_graph.html
+++ b/latest/features/torch_compile_and_piecewise_cuda_graph.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1084,9 +1086,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/genindex.html b/latest/genindex.html
index 3166add6e3..2cf4de2972 100644
--- a/latest/genindex.html
+++ b/latest/genindex.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -353,6 +353,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -361,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1559,9 +1561,11 @@
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__">(tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig attribute)</a>
 </li>
       </ul></li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.__init__">__init__() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.__init__">__init__() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.__init__">(tensorrt_llm.llmapi.AttentionDpConfig method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.__init__">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
 </li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.__init__">(tensorrt_llm.llmapi.BatchingType method)</a>
@@ -1735,8 +1739,6 @@
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.additional_model_outputs">additional_model_outputs (tensorrt_llm.llmapi.SamplingParams attribute)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm">algorithm (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
 
       <ul>
@@ -1749,6 +1751,28 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allgather">allgather() (in module tensorrt_llm.functional)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.allow_advanced_sampling">allow_advanced_sampling (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>
+
+      <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
+</li>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.allow_advanced_sampling">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
+</li>
+      </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allreduce">allreduce() (in module tensorrt_llm.functional)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy">allreduce_strategy (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
@@ -1780,6 +1804,8 @@
       <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo">assert_valid_quant_algo() (tensorrt_llm.models.GemmaForCausalLM class method)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.assertion">assertion() (in module tensorrt_llm.functional)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM">AsyncLLM (class in tensorrt_llm.llmapi)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention">Attention (class in tensorrt_llm.layers.attention)</a>
 </li>
@@ -2051,6 +2077,12 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CLIPVisionTransformer">CLIPVisionTransformer (class in tensorrt_llm.models)</a>
 </li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.coerce_env_overrides_to_str">coerce_env_overrides_to_str() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>
+
+      <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.coerce_env_overrides_to_str">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
+</li>
+      </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.CogVLMAttention">CogVLMAttention (class in tensorrt_llm.layers.attention)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMConfig">CogVLMConfig (class in tensorrt_llm.models)</a>
@@ -2067,6 +2099,8 @@
         <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.collect_and_bias">(tensorrt_llm.layers.linear.RowLinear method)</a>
 </li>
       </ul></li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.collective_rpc">collective_rpc() (tensorrt_llm.llmapi.AsyncLLM method)</a>
+</li>
       <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ColumnLinear">ColumnLinear (in module tensorrt_llm.layers.linear)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings">CombinedTimestepLabelEmbeddings (class in tensorrt_llm.layers.embedding)</a>
@@ -3421,9 +3455,11 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM">GemmaForCausalLM (class in tensorrt_llm.models)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate">generate() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.generate">generate() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.generate">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
         <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.generate">(tensorrt_llm.runtime.EncDecModelRunner method)</a>
@@ -3441,9 +3477,11 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_slopes">generate_alibi_slopes() (in module tensorrt_llm.functional)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate_async">generate_async() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.generate_async">generate_async() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate_async">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.generate_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
       </ul></li>
@@ -3477,25 +3515,29 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value">get_first_past_key_value() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.get_hf_config">get_hf_config() (tensorrt_llm.models.GemmaConfig static method)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.get_indices_block_size">get_indices_block_size() (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
 
       <ul>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
 </li>
       </ul></li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events">get_kv_cache_events() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events">get_kv_cache_events() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
       </ul></li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events_async">get_kv_cache_events_async() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events_async">get_kv_cache_events_async() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events_async">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
       </ul></li>
@@ -3545,15 +3587,19 @@
       </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_seq_idx">get_seq_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats">get_stats() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.get_stats">get_stats() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_stats">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
       </ul></li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats_async">get_stats_async() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.get_stats_async">get_stats_async() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats_async">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
 </li>
       </ul></li>
@@ -4141,9 +4187,11 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir">llm_engine_dir (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.llm_id">llm_id (tensorrt_llm.llmapi.LLM attribute)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.llm_id">llm_id (tensorrt_llm.llmapi.AsyncLLM property)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.llm_id">(tensorrt_llm.llmapi.LLM attribute)</a>
+</li>
         <li><a href="llm-api/reference.html#id0">(tensorrt_llm.llmapi.LLM property)</a>
 </li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.llm_id">(tensorrt_llm.llmapi.MultimodalEncoder property)</a>
@@ -5565,6 +5613,12 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.ndim">ndim() (tensorrt_llm.functional.Tensor method)</a>
 </li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs">needs_separate_short_long_cuda_graphs() (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
+
+      <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.needs_separate_short_long_cuda_graphs">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
+</li>
+      </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.network">network (tensorrt_llm.functional.Tensor property)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids">next_medusa_input_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
@@ -5601,6 +5655,8 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_beams">num_beams (tensorrt_llm.runtime.SamplingConfig attribute)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers">num_capture_layers (tensorrt_llm.llmapi.EagleDecodingConfig property)</a>
 
       <ul>
@@ -5609,8 +5665,6 @@
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig property)</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_draft_tokens">num_draft_tokens (tensorrt_llm.runtime.GenerationSession attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers">num_eagle_layers (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
@@ -6196,6 +6250,8 @@
 </li>
       </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.rank">rank() (tensorrt_llm.functional.Tensor method)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.ray_placement_config">ray_placement_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls">ray_worker_extension_cls (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
 </li>
@@ -6235,8 +6291,12 @@
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk">relaxed_topk (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
 </li>
-      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.release">release() (tensorrt_llm.models.PretrainedModel method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.release">release() (tensorrt_llm.llmapi.AsyncLLM method)</a>
+
+      <ul>
+        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.release">(tensorrt_llm.models.PretrainedModel method)</a>
 </li>
+      </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.relu">relu() (in module tensorrt_llm.functional)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.remove_input_padding">remove_input_padding (tensorrt_llm.plugin.PluginConfig attribute)</a>
@@ -6348,6 +6408,8 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager">resource_manager (tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.result">result() (tensorrt_llm.llmapi.RequestOutput method)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.resume">resume() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_context_logits">return_context_logits (tensorrt_llm.llmapi.SamplingParams attribute)</a>
 </li>
@@ -6507,6 +6569,8 @@
 <h2 id="S">S</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sampler_force_async_worker">sampler_force_async_worker (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
+</li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sampler_type">sampler_type (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params">sampling_params (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input attribute)</a>
@@ -6653,6 +6717,12 @@
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.send">send() (in module tensorrt_llm.functional)</a>
 </li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.seq_len_threshold">seq_len_threshold (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
+
+      <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.seq_len_threshold">(tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
+</li>
+      </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.serialize_engine">serialize_engine() (tensorrt_llm.runtime.ModelRunner method)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session">Session (class in tensorrt_llm.runtime)</a>
@@ -6690,6 +6760,8 @@
       <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins">set_smooth_quant_plugins() (tensorrt_llm.plugin.PluginConfig method)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.setup">setup() (tensorrt_llm.runtime.GenerationSession method)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.setup_async">setup_async() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode">setup_embedding_parallel_mode() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
 </li>
@@ -6709,9 +6781,11 @@
       </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.shape">shape() (in module tensorrt_llm.functional)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.shutdown">shutdown() (tensorrt_llm.llmapi.LLM method)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.shutdown">shutdown() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.shutdown">(tensorrt_llm.llmapi.LLM method)</a>
+</li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.shutdown">(tensorrt_llm.llmapi.MpiCommSession method)</a>
 </li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.shutdown">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
@@ -6738,11 +6812,13 @@
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.size">size() (tensorrt_llm.functional.Tensor method)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks">skip_cross_attn_blocks (tensorrt_llm.runtime.ModelConfig attribute)</a>
-</li>
-      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_kv">skip_cross_kv (tensorrt_llm.runtime.ModelConfig attribute)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_kv">skip_cross_kv (tensorrt_llm.runtime.ModelConfig attribute)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.skip_indexer_for_short_seqs">skip_indexer_for_short_seqs (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
+</li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.skip_special_tokens">skip_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init">skip_tokenizer_init (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
@@ -6990,6 +7066,8 @@
 </li>
       </ul></li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.swiglu">swiglu() (in module tensorrt_llm.functional)</a>
+</li>
+      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.SYMM_MEM">SYMM_MEM (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype">sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
 </li>
@@ -8273,6 +8351,8 @@
       <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv">tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState">tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections (C++ function)</a>
+</li>
+      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv">tensorrt_llm::executor::kv_cache::ConnectionManager::isRunning (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t">tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect (C++ function)</a>
 </li>
@@ -8650,9 +8730,11 @@
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE">tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel (C++ member)</a>
 </li>
-      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData (C++ function)</a>
+      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE">tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData (C++ function)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE">tensorrt_llm::executor::KVCacheStoredBlockData::loraId (C++ member)</a>
+</li>
+      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE">tensorrt_llm::executor::KVCacheStoredBlockData::mmKeys (C++ member)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE">tensorrt_llm::executor::KVCacheStoredBlockData::priority (C++ member)</a>
 </li>
@@ -8781,6 +8863,8 @@
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME">tensorrt_llm::executor::MemoryType::kUVM (C++ enumerator)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE">tensorrt_llm::executor::MillisecondsType (C++ type)</a>
+</li>
+      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5MmKeyE">tensorrt_llm::executor::MmKey (C++ type)</a>
 </li>
       <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelTypeE">tensorrt_llm::executor::ModelType (C++ enum)</a>
 </li>
@@ -10167,11 +10251,11 @@
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type (C++ type)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt; (C++ struct)</a>
-</li>
-      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name (C++ member)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name (C++ member)</a>
+</li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size (C++ member)</a>
 </li>
       <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type (C++ type)</a>
@@ -12688,9 +12772,11 @@
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start">token_start (tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig property)</a>
 </li>
-      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.tokenizer">tokenizer (tensorrt_llm.llmapi.LLM attribute)</a>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.tokenizer">tokenizer (tensorrt_llm.llmapi.AsyncLLM property)</a>
 
       <ul>
+        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.tokenizer">(tensorrt_llm.llmapi.LLM attribute)</a>
+</li>
         <li><a href="llm-api/reference.html#id1">(tensorrt_llm.llmapi.LLM property)</a>
 </li>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.tokenizer">(tensorrt_llm.llmapi.MultimodalEncoder property)</a>
@@ -13463,6 +13549,8 @@
       <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset">update_output_ids_by_offset() (tensorrt_llm.runtime.GenerationSession method)</a>
 </li>
       <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.update_strategy">update_strategy() (tensorrt_llm.functional.AllReduceParams method)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM.update_weights">update_weights() (tensorrt_llm.llmapi.AsyncLLM method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.upper">upper() (tensorrt_llm.llmapi.BatchingType method)</a>
 
@@ -13646,14 +13734,14 @@
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate_draft_len_schedule_and_sort">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype">validate_dtype() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>
 
       <ul>
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto">validate_dtype_not_auto() (tensorrt_llm.plugin.PluginConfig class method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache">validate_enable_build_cache() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
@@ -13666,6 +13754,8 @@
         <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
 </li>
       </ul></li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block">validate_helix_tokens_per_block() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
+</li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype">validate_kv_cache_dtype() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer">validate_load_balancer() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
@@ -13705,6 +13795,8 @@
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values">validate_positive_values() (tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config">validate_quant_config() (tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
+</li>
+      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config">validate_ray_placement_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
 </li>
       <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls">validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
 </li>
@@ -13975,9 +14067,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/index.html b/latest/index.html
index ed21adbbdb..2592907c28 100644
--- a/latest/index.html
+++ b/latest/index.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -77,7 +77,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -361,6 +361,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -369,6 +370,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -530,6 +532,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -538,12 +541,14 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
 <li class="toctree-l1"><a class="reference internal" href="deployment-guide/index.html">Model Recipes</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="deployment-guide/index.html#quick-start-for-popular-models">Quick Start for Popular Models</a></li>
 <li class="toctree-l2"><a class="reference internal" href="deployment-guide/index.html#model-specific-deployment-guides">Model-Specific Deployment Guides</a></li>
+<li class="toctree-l2"><a class="reference internal" href="deployment-guide/index.html#comprehensive-configuration-database">Comprehensive Configuration Database</a></li>
 </ul>
 </li>
 </ul>
@@ -603,6 +608,7 @@
 </li>
 <li class="toctree-l1"><a class="reference internal" href="llm-api/reference.html">API Reference</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="llm-api/reference.html#tensorrt_llm.llmapi.LLM"><code class="docutils literal notranslate"><span class="pre">LLM</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="llm-api/reference.html#tensorrt_llm.llmapi.AsyncLLM"><code class="docutils literal notranslate"><span class="pre">AsyncLLM</span></code></a></li>
 <li class="toctree-l2"><a class="reference internal" href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder"><code class="docutils literal notranslate"><span class="pre">MultimodalEncoder</span></code></a></li>
 <li class="toctree-l2"><a class="reference internal" href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput"><code class="docutils literal notranslate"><span class="pre">CompletionOutput</span></code></a></li>
 <li class="toctree-l2"><a class="reference internal" href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput"><code class="docutils literal notranslate"><span class="pre">RequestOutput</span></code></a></li>
@@ -1173,9 +1179,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/build-from-source-linux.html b/latest/installation/build-from-source-linux.html
index 3ca344064a..fa323b97f9 100644
--- a/latest/installation/build-from-source-linux.html
+++ b/latest/installation/build-from-source-linux.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -679,7 +681,7 @@ example:</p>
 python3<span class="w"> </span>./scripts/build_wheel.py<span class="w"> </span>--cuda_architectures<span class="w"> </span><span class="s2">&quot;80-real;86-real&quot;</span>
 </pre></div>
 </div>
-<p>To use the C++ benchmark scripts under <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/benchmarks/cpp/">benchmark/cpp</a>, for example <code class="docutils literal notranslate"><span class="pre">gptManagerBenchmark.cpp</span></code>, add the <code class="docutils literal notranslate"><span class="pre">--benchmarks</span></code> option:</p>
+<p>To use the C++ benchmark scripts under <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/benchmarks/cpp/">benchmark/cpp</a>, for example <code class="docutils literal notranslate"><span class="pre">gptManagerBenchmark.cpp</span></code>, add the <code class="docutils literal notranslate"><span class="pre">--benchmarks</span></code> option:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>./scripts/build_wheel.py<span class="w"> </span>--benchmarks
 </pre></div>
 </div>
@@ -903,9 +905,9 @@ pip<span class="w"> </span>install<span class="w"> </span>./build/tensorrt_llm*.
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/containers.html b/latest/installation/containers.html
index 045a5244a6..9b57b3eb4e 100644
--- a/latest/installation/containers.html
+++ b/latest/installation/containers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -526,7 +528,7 @@ on NGC. This is likely the simplest way to obtain TensorRT LLM. Please refer to
 <p class="admonition-title">Container image tags</p>
 <p>In the example shell commands, <code class="docutils literal notranslate"><span class="pre">x.y.z</span></code> corresponds to the TensorRT-LLM container
 version to use. If omitted, <code class="docutils literal notranslate"><span class="pre">IMAGE_TAG</span></code> will default to <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.__version__</span></code>
-(e.g., this documentation was generated from the <code class="docutils literal notranslate"><span class="pre">1.2.0rc5</span></code> source tree).
+(e.g., this documentation was generated from the <code class="docutils literal notranslate"><span class="pre">1.2.0rc6</span></code> source tree).
 If this does not work, e.g., because a container for the version you are
 currently working with has not been released yet, you can try using a
 container published for a previous
@@ -663,9 +665,9 @@ for all related options.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/index.html b/latest/installation/index.html
index 5f9e23259b..f26578ef22 100644
--- a/latest/installation/index.html
+++ b/latest/installation/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -653,9 +655,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/installation/linux.html b/latest/installation/linux.html
index 204f61b2b5..c6fa79784c 100644
--- a/latest/installation/linux.html
+++ b/latest/installation/linux.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -559,7 +561,7 @@ image hosted on NGC</a>
 <span class="linenos"> 4</span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">():</span>
 <span class="linenos"> 5</span>
 <span class="linenos"> 6</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos"> 7</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos"> 7</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos"> 8</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">)</span>
 <span class="linenos"> 9</span>
 <span class="linenos">10</span>    <span class="c1"># Sample prompts.</span>
@@ -740,9 +742,9 @@ pip3<span class="w"> </span>install<span class="w"> </span>--upgrade<span class=
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/disaggregated-service.html b/latest/legacy/advanced/disaggregated-service.html
index f4b6748203..678e275496 100644
--- a/latest/legacy/advanced/disaggregated-service.html
+++ b/latest/legacy/advanced/disaggregated-service.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -707,9 +709,9 @@ This feature is currently in prototype, and the related API is subjected to chan
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/executor.html b/latest/legacy/advanced/executor.html
index 17e37c3ece..5ddbae13ca 100644
--- a/latest/legacy/advanced/executor.html
+++ b/latest/legacy/advanced/executor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -522,7 +524,7 @@
 <p>TensorRT-LLM includes a high-level C++ API called the Executor API which allows you to execute requests
 asynchronously, with in-flight batching, and without the need to define callbacks.</p>
 <p>A software component (referred to as “the client” in the text that follows) can interact
-with the executor using the API defined in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/include/tensorrt_llm/executor/executor.h"><code class="docutils literal notranslate"><span class="pre">executor.h</span></code></a> file.
+with the executor using the API defined in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/include/tensorrt_llm/executor/executor.h"><code class="docutils literal notranslate"><span class="pre">executor.h</span></code></a> file.
 For details about the API, refer to the <span class="xref std std-ref">_cpp_gen/executor.rst</span>.</p>
 <p>The following sections provide an overview of the main classes defined in the Executor API.</p>
 <section id="api">
@@ -590,7 +592,7 @@ This allows the runtime to reconfigure itself for a new beam width when no reque
 <span class="n">stop_token_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">eos_token_id</span><span class="p">]</span>
 </pre></div>
 </div>
-<p>Refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/llmapi/tokenizer.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm/llmapi/tokenizer.py</span></code></a> for more details. You may dump these materials to disk, and reload them to C++ runtime for use.</p>
+<p>Refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/llmapi/tokenizer.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm/llmapi/tokenizer.py</span></code></a> for more details. You may dump these materials to disk, and reload them to C++ runtime for use.</p>
 <p>Each request can be optionally specified with a <code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code>, which defines the desired structured format. Currently, it supports four types:</p>
 <ul class="simple">
 <li><p><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams::GuideType::kJSON</span></code>: The generated text is amenable to JSON format;</p></li>
@@ -639,12 +641,12 @@ This allows the runtime to reconfigure itself for a new beam width when no reque
 </section>
 <section id="c-executor-api-example">
 <h2>C++ Executor API Example<a class="headerlink" href="#c-executor-api-example" title="Link to this heading">#</a></h2>
-<p>Two C++ examples are provided that shows how to use the Executor API and can be found in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/cpp/executor/"><code class="docutils literal notranslate"><span class="pre">examples/cpp/executor</span></code></a> folder.</p>
+<p>Two C++ examples are provided that shows how to use the Executor API and can be found in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/cpp/executor/"><code class="docutils literal notranslate"><span class="pre">examples/cpp/executor</span></code></a> folder.</p>
 </section>
 <section id="python-bindings-for-the-executor-api">
 <h2>Python Bindings for the Executor API<a class="headerlink" href="#python-bindings-for-the-executor-api" title="Link to this heading">#</a></h2>
-<p>Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/pybind/executor/bindings.cpp">bindings.cpp</a> and once built, are available in package <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.bindings.executor</span></code>. Running <code class="docutils literal notranslate"><span class="pre">'help('tensorrt_llm.bindings.executor')</span></code> in a Python interpreter will provide an overview of the classes available.</p>
-<p>In addition, three Python examples are provided to demonstrate how to use the Python bindings to the Executor API for single and multi-GPU models. They can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/bindings"><code class="docutils literal notranslate"><span class="pre">examples/bindings</span></code></a>.</p>
+<p>Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/pybind/executor/bindings.cpp">bindings.cpp</a> and once built, are available in package <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.bindings.executor</span></code>. Running <code class="docutils literal notranslate"><span class="pre">'help('tensorrt_llm.bindings.executor')</span></code> in a Python interpreter will provide an overview of the classes available.</p>
+<p>In addition, three Python examples are provided to demonstrate how to use the Python bindings to the Executor API for single and multi-GPU models. They can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/bindings"><code class="docutils literal notranslate"><span class="pre">examples/bindings</span></code></a>.</p>
 </section>
 <section id="in-flight-batching-with-the-triton-inference-server">
 <h2>In-flight Batching with the Triton Inference Server<a class="headerlink" href="#in-flight-batching-with-the-triton-inference-server" title="Link to this heading">#</a></h2>
@@ -799,9 +801,9 @@ the TensorRT-LLM C++ Executor API.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/expert-parallelism.html b/latest/legacy/advanced/expert-parallelism.html
index aa4e1ac598..c47083c021 100644
--- a/latest/legacy/advanced/expert-parallelism.html
+++ b/latest/legacy/advanced/expert-parallelism.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -665,9 +667,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/gpt-attention.html b/latest/legacy/advanced/gpt-attention.html
index 8ce8536e20..46ad226119 100644
--- a/latest/legacy/advanced/gpt-attention.html
+++ b/latest/legacy/advanced/gpt-attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -690,9 +692,9 @@ reach that point).</p>
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocate new blocks from a pool and recycle those
 blocks when required. See the simplified implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
 A more efficient C++ implementation is included in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
 </section>
 </section>
 <section id="int8-fp8-kv-caches">
@@ -982,9 +984,9 @@ is computed as:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/gpt-runtime.html b/latest/legacy/advanced/gpt-runtime.html
index 19e4dc089e..2f0989e3f3 100644
--- a/latest/legacy/advanced/gpt-runtime.html
+++ b/latest/legacy/advanced/gpt-runtime.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1027,9 +1029,9 @@ The <code class="docutils literal notranslate"><span class="pre">GptDecoder</spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/graph-rewriting.html b/latest/legacy/advanced/graph-rewriting.html
index 4111bf72f1..a415e83f12 100644
--- a/latest/legacy/advanced/graph-rewriting.html
+++ b/latest/legacy/advanced/graph-rewriting.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -845,9 +847,9 @@ techniques to optimize the underlying graph.  It provides a wrapper similar to P
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/kv-cache-management.html b/latest/legacy/advanced/kv-cache-management.html
index d63c338f00..e6b7917b22 100644
--- a/latest/legacy/advanced/kv-cache-management.html
+++ b/latest/legacy/advanced/kv-cache-management.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -750,9 +752,9 @@ An “event” is any significant change in the lifecycle or state of a KV cache
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/kv-cache-reuse.html b/latest/legacy/advanced/kv-cache-reuse.html
index 6399eb592b..7ebadf06c5 100644
--- a/latest/legacy/advanced/kv-cache-reuse.html
+++ b/latest/legacy/advanced/kv-cache-reuse.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -723,9 +725,9 @@ Assume vocabulary size is 100, which means normal text token ids are in range [0
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/lora.html b/latest/legacy/advanced/lora.html
index 16e9698922..f16afd9cb4 100644
--- a/latest/legacy/advanced/lora.html
+++ b/latest/legacy/advanced/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -837,9 +839,9 @@ The shape of <code class="docutils literal notranslate"><span class="pre">LoraWe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/lowprecision-pcie-allreduce.html b/latest/legacy/advanced/lowprecision-pcie-allreduce.html
index 8693d0ca0e..f827420cee 100644
--- a/latest/legacy/advanced/lowprecision-pcie-allreduce.html
+++ b/latest/legacy/advanced/lowprecision-pcie-allreduce.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -706,9 +708,9 @@ This feature is optimized for PCIe-based GPU topologies and may affect model acc
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/open-sourced-cutlass-kernels.html b/latest/legacy/advanced/open-sourced-cutlass-kernels.html
index e8ca589ec1..135e6bacdc 100644
--- a/latest/legacy/advanced/open-sourced-cutlass-kernels.html
+++ b/latest/legacy/advanced/open-sourced-cutlass-kernels.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -661,9 +663,9 @@ Note that support for these static libraries will be gradually deprioritized in
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/speculative-decoding.html b/latest/legacy/advanced/speculative-decoding.html
index 9825634ec6..fdba2d86de 100644
--- a/latest/legacy/advanced/speculative-decoding.html
+++ b/latest/legacy/advanced/speculative-decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -837,9 +839,9 @@ However, similar to any new model, you can follow the same approach to define yo
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/advanced/weight-streaming.html b/latest/legacy/advanced/weight-streaming.html
index f7ac5361cf..65b4e12b4a 100644
--- a/latest/legacy/advanced/weight-streaming.html
+++ b/latest/legacy/advanced/weight-streaming.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -694,9 +696,9 @@ python3<span class="w"> </span>examples/summarize.py<span class="w"> </span><spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/architecture/add-model.html b/latest/legacy/architecture/add-model.html
index c3f13f3a4b..d489fa9556 100644
--- a/latest/legacy/architecture/add-model.html
+++ b/latest/legacy/architecture/add-model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -736,9 +738,9 @@ python<span class="w"> </span>../summarize.py<span class="w"> </span>--engine_di
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/architecture/checkpoint.html b/latest/legacy/architecture/checkpoint.html
index 445321753c..39ca48a308 100644
--- a/latest/legacy/architecture/checkpoint.html
+++ b/latest/legacy/architecture/checkpoint.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1003,9 +1005,9 @@ trtllm-build<span class="w"> </span>--checkpoint_dir<span class="w"> </span>./op
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/architecture/core-concepts.html b/latest/legacy/architecture/core-concepts.html
index ddb5f381c2..a0c494acb9 100644
--- a/latest/legacy/architecture/core-concepts.html
+++ b/latest/legacy/architecture/core-concepts.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -526,7 +528,7 @@ to create graph representations of deep neural networks in TensorRT. To become
 familiar with the core concepts of the TensorRT API, refer to the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/coreConcepts.html">Core Concepts</a>
 section of the TensorRT documentation before proceeding further.</p>
-<p>In TensorRT-LLM, the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/builder.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code></a> class
+<p>In TensorRT-LLM, the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/builder.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code></a> class
 contains a
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
 object. That instance is used in the <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder.create_network</span></code>
@@ -534,7 +536,7 @@ method to create an instance of the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>
 class. The <code class="docutils literal notranslate"><span class="pre">INetworkDefinition</span></code> object can then be populated using the free
 functions defined in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/functional.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.functional</span></code></a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/functional.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.functional</span></code></a>.</p>
 <p>A simple example of such a free function is <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.activation</span></code> that inserts a
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Layers.html#tensorrt.IActivationLayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.IActivationLayer</span></code></a>
 node in the graph of the model:</p>
@@ -669,14 +671,14 @@ limitation, TensorRT offers a powerful mechanism known as
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Plugin/pyPlugin.html">plugins</a>.</p>
 <p>The plugins are nodes inserted in the network graph definition that map to user-defined
 GPU kernels. TensorRT-LLM uses a number of such plugins. They can be found in
-the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078//cpp/tensorrt_llm/plugins"><code class="docutils literal notranslate"><span class="pre">cpp/tensorrt_llm/plugins</span></code></a> directory.</p>
+the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426//cpp/tensorrt_llm/plugins"><code class="docutils literal notranslate"><span class="pre">cpp/tensorrt_llm/plugins</span></code></a> directory.</p>
 <p>Plugins are written in C++ and follow a well-defined interface described in the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/extending-custom-layers.html">Extending TensorRT with Custom Layers</a>
 section of the TensorRT
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/index.html">Developer Guide</a>.
 When executed within a TensorRT engine, plugins trigger the execution of
 their encapsulated GPU kernels. A fairly simple example of plugins is the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078//cpp/tensorrt_llm/plugins/quantizeTensorPlugin"><code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin</span></code></a> that
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426//cpp/tensorrt_llm/plugins/quantizeTensorPlugin"><code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin</span></code></a> that
 triggers a CUDA kernel in the <code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin::enqueue</span></code> member function:</p>
 <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// In cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp:</span>
 
@@ -720,7 +722,7 @@ using TensorRT plugins that wrap communication primitives from the
 plugin that optimize the All-Reduce primitive in the presence of All-to-all
 connections between GPUs (through NVSwitch in DGX systems).</p>
 <p>The communication plugins can be found in
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/plugins/ncclPlugin">cpp/tensorrt_llm/plugins/ncclPlugin</a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/plugins/ncclPlugin">cpp/tensorrt_llm/plugins/ncclPlugin</a>
 and the multi-GPU functions are exposed in the TensorRT-LLM Model Definition API
 as:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># In tensorrt_llm/functional.py:</span>
@@ -1007,9 +1009,9 @@ srun<span class="w"> </span><span class="se">\</span>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/architecture/model-weights-loader.html b/latest/legacy/architecture/model-weights-loader.html
index 36e4fec335..863f56ebce 100644
--- a/latest/legacy/architecture/model-weights-loader.html
+++ b/latest/legacy/architecture/model-weights-loader.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -945,9 +947,9 @@ The support for Qwen-1 is in <code class="docutils literal notranslate"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/architecture/workflow.html b/latest/legacy/architecture/workflow.html
index 16ecf537ae..37623f10b0 100644
--- a/latest/legacy/architecture/workflow.html
+++ b/latest/legacy/architecture/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -833,9 +835,9 @@ The usage of this API looks like this:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/dev-on-cloud/build-image-to-dockerhub.html b/latest/legacy/dev-on-cloud/build-image-to-dockerhub.html
index 45e43038c8..0495184f74 100644
--- a/latest/legacy/dev-on-cloud/build-image-to-dockerhub.html
+++ b/latest/legacy/dev-on-cloud/build-image-to-dockerhub.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -691,9 +693,9 @@ docker<span class="w"> </span>push<span class="w"> </span>&lt;your_dockerhub_use
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/dev-on-cloud/dev-on-runpod.html b/latest/legacy/dev-on-cloud/dev-on-runpod.html
index 93f21fb00c..1ba0629269 100644
--- a/latest/legacy/dev-on-cloud/dev-on-runpod.html
+++ b/latest/legacy/dev-on-cloud/dev-on-runpod.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -691,9 +693,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/key-features.html b/latest/legacy/key-features.html
index a08efca7d9..03d824540a 100644
--- a/latest/legacy/key-features.html
+++ b/latest/legacy/key-features.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -633,9 +635,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/perf-analysis.html b/latest/legacy/performance/perf-analysis.html
index 5e829b044e..ae27417202 100644
--- a/latest/legacy/performance/perf-analysis.html
+++ b/latest/legacy/performance/perf-analysis.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -584,10 +586,10 @@
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="ch">#!/bin/bash</span>
 
 <span class="c1"># Prepare dataset for the benchmark</span>
-python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--tokenizer<span class="o">=</span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--stdout<span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="si">${</span><span class="nv">NUM_SAMPLES</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--input-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>&gt;<span class="w"> </span>/tmp/dataset.txt
+trtllm-bench<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--model<span class="o">=</span><span class="si">${</span><span class="nv">MODEL_PATH</span><span class="si">}</span><span class="w"> </span>prepare-dataset<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--output<span class="w"> </span>/tmp/dataset.txt<span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="si">${</span><span class="nv">NUM_SAMPLES</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--input-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="m">1000</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span>
 
 <span class="c1"># Benchmark and profile</span>
 <span class="nv">TLLM_PROFILE_START_STOP</span><span class="o">=</span><span class="m">100</span>-150<span class="w"> </span>nsys<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -749,9 +751,9 @@ python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w">
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/perf-benchmarking.html b/latest/legacy/performance/perf-benchmarking.html
index b1e5bd5a90..9f0bd3dba0 100644
--- a/latest/legacy/performance/perf-benchmarking.html
+++ b/latest/legacy/performance/perf-benchmarking.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -629,7 +631,7 @@ a smaller subset.</p>
 <code class="docutils literal notranslate"><span class="pre">meta-llama/Llama-3.1-8B</span></code> on a synthetic dataset with a uniform distribution of prompts with ISL:OSL
 of 128:128.
 To run the benchmark from start to finish, run the following commands:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--stdout<span class="w"> </span>--tokenizer<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">3000</span><span class="w"> </span>&gt;<span class="w"> </span>/tmp/synthetic_128_128.txt
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--tokenizer<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>prepare-dataset<span class="w"> </span>--output<span class="w"> </span>/tmp/synthetic_128_128.txt<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">3000</span>
 trtllm-bench<span class="w"> </span>--model<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>build<span class="w"> </span>--dataset<span class="w"> </span>/tmp/synthetic_128_128.txt<span class="w"> </span>--quantization<span class="w"> </span>FP8
 trtllm-bench<span class="w"> </span>--model<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>throughput<span class="w"> </span>--dataset<span class="w"> </span>/tmp/synthetic_128_128.txt<span class="w"> </span>--engine_dir<span class="w"> </span>/tmp/meta-llama/Llama-3.1-8B/tp_1_pp_1
 </pre></div>
@@ -745,7 +747,7 @@ JSON entry is on every line.</p>
 <p>In order to prepare a synthetic dataset, you can use the provided script in the <code class="docutils literal notranslate"><span class="pre">benchmarks/cpp</span></code>
 directory. For example, to generate a synthetic dataset of 1000 requests with a uniform ISL/OSL of
 128/128 for <a class="reference external" href="https://huggingface.co/meta-llama/Llama-3.1-8B">meta-llama/Llama-3.1-8B</a>, run:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--stdout<span class="w"> </span>--tokenizer<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">1000</span><span class="w"> </span>&gt;<span class="w"> </span>/tmp/synthetic_128_128.txt
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--tokenizer<span class="w"> </span>meta-llama/Llama-3.1-8B<span class="w"> </span>prepare-dataset<span class="w"> </span>--output<span class="w"> </span>/tmp/synthetic_128_128.txt<span class="w"> </span>token-norm-dist<span class="w"> </span>--input-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--output-mean<span class="w"> </span><span class="m">128</span><span class="w"> </span>--input-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="w"> </span><span class="m">0</span><span class="w"> </span>--num-requests<span class="w"> </span><span class="m">3000</span>
 </pre></div>
 </div>
 </section>
@@ -1182,7 +1184,7 @@ checkpoint. For the Llama-3.1 models, TensorRT-LLM provides the following checkp
 <li><p><a class="reference external" href="https://huggingface.co/nvidia/Llama-3.1-405B-Instruct-FP8"><code class="docutils literal notranslate"><span class="pre">nvidia/Llama-3.1-405B-Instruct-FP8</span></code></a></p></li>
 </ul>
 <p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span></code> utilizes the <code class="docutils literal notranslate"><span class="pre">hf_quant_config.json</span></code> file present in the pre-quantized checkpoints above. The configuration
-file is present in checkpoints quantized with <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">TensorRT Model Optimizer</a>
+file is present in checkpoints quantized with <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">Model Optimizer</a>
 and describes the compute and KV cache quantization that checkpoint was compiled with. For example, from the checkpoints
 above:</p>
 <div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
@@ -1589,9 +1591,9 @@ The choices are specified with a YAML file like the following example (<code cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html b/latest/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
index 475bc39a76..9c2d377fea 100644
--- a/latest/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
+++ b/latest/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -898,9 +900,9 @@ P99:<span class="w"> </span><span class="m">1</span>.00
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html b/latest/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
index 02dee55617..91f9e67a53 100644
--- a/latest/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
+++ b/latest/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -677,9 +679,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/fp8-quantization.html b/latest/legacy/performance/performance-tuning-guide/fp8-quantization.html
index 8a6c03a7c5..2ed3e2024f 100644
--- a/latest/legacy/performance/performance-tuning-guide/fp8-quantization.html
+++ b/latest/legacy/performance/performance-tuning-guide/fp8-quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1009,9 +1011,9 @@ accuracy loss is unacceptable.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/index.html b/latest/legacy/performance/performance-tuning-guide/index.html
index d578c6316d..91982b3f41 100644
--- a/latest/legacy/performance/performance-tuning-guide/index.html
+++ b/latest/legacy/performance/performance-tuning-guide/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -671,9 +673,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/introduction.html b/latest/legacy/performance/performance-tuning-guide/introduction.html
index 931e31e5b8..d6cbaf0447 100644
--- a/latest/legacy/performance/performance-tuning-guide/introduction.html
+++ b/latest/legacy/performance/performance-tuning-guide/introduction.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -660,9 +662,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html b/latest/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
index b00d8babc2..0d0f72b106 100644
--- a/latest/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
+++ b/latest/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -859,9 +861,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/useful-build-time-flags.html b/latest/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
index 5f85cfd2a7..6c74064d57 100644
--- a/latest/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
+++ b/latest/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -922,9 +924,9 @@ This can be enabled via the LLM-API as such</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/performance/performance-tuning-guide/useful-runtime-flags.html b/latest/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
index 9a68cbabda..30153b9d14 100644
--- a/latest/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
+++ b/latest/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -845,9 +847,9 @@ via <code class="docutils literal notranslate"><span class="pre">KVCacheConfig</
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.functional.html b/latest/legacy/python-api/tensorrt_llm.functional.html
index 075ff78495..231f1083aa 100644
--- a/latest/legacy/python-api/tensorrt_llm.functional.html
+++ b/latest/legacy/python-api/tensorrt_llm.functional.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -4777,6 +4779,11 @@ The dimension of the tensor to slice.</p></li>
 <span class="sig-name descname"><span class="pre">NCCL_SYMMETRIC</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">8</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.SYMM_MEM">
+<span class="sig-name descname"><span class="pre">SYMM_MEM</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">9</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.SYMM_MEM" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 </dd></dl>
 
 <dl class="py class">
@@ -6871,6 +6878,7 @@ The index for rebuilding the sequence</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION"><code class="docutils literal notranslate"><span class="pre">LOWPRECISION</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.MNNVL"><code class="docutils literal notranslate"><span class="pre">MNNVL</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC"><code class="docutils literal notranslate"><span class="pre">NCCL_SYMMETRIC</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.SYMM_MEM"><code class="docutils literal notranslate"><span class="pre">SYMM_MEM</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp"><code class="docutils literal notranslate"><span class="pre">AllReduceFusionOp</span></code></a><ul class="nav section-nav flex-column">
@@ -7042,9 +7050,9 @@ The index for rebuilding the sequence</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.layers.html b/latest/legacy/python-api/tensorrt_llm.layers.html
index ebedb6b146..6bd1146a50 100644
--- a/latest/legacy/python-api/tensorrt_llm.layers.html
+++ b/latest/legacy/python-api/tensorrt_llm.layers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -2594,9 +2596,9 @@ the number of tokens used for each task, should be equal to prompt_embedding_tab
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.models.html b/latest/legacy/python-api/tensorrt_llm.models.html
index e5f1f8bbb2..61f9e01c63 100644
--- a/latest/legacy/python-api/tensorrt_llm.models.html
+++ b/latest/legacy/python-api/tensorrt_llm.models.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -3649,9 +3651,9 @@ ranges of the dimensions of when using TRT dynamic shapes.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.plugin.html b/latest/legacy/python-api/tensorrt_llm.plugin.html
index ae71e27fa2..d16abb9109 100644
--- a/latest/legacy/python-api/tensorrt_llm.plugin.html
+++ b/latest/legacy/python-api/tensorrt_llm.plugin.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -1660,9 +1662,9 @@ migrated to the centralized building script <cite>tensorrt_llm/commands/build.py
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.quantization.html b/latest/legacy/python-api/tensorrt_llm.quantization.html
index 8c9bd753ec..222372956b 100644
--- a/latest/legacy/python-api/tensorrt_llm.quantization.html
+++ b/latest/legacy/python-api/tensorrt_llm.quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -722,9 +724,9 @@ the quantized model as TRT-LLM checkpoint</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/python-api/tensorrt_llm.runtime.html b/latest/legacy/python-api/tensorrt_llm.runtime.html
index ded71b6e08..9dae973b65 100644
--- a/latest/legacy/python-api/tensorrt_llm.runtime.html
+++ b/latest/legacy/python-api/tensorrt_llm.runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -3291,9 +3293,9 @@ mrope_position_deltas (<cite>torch.Tensor</cite> of shape <cite>(batch_size)</ci
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/reference/memory.html b/latest/legacy/reference/memory.html
index 3390766ee8..2e08e076fc 100644
--- a/latest/legacy/reference/memory.html
+++ b/latest/legacy/reference/memory.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -579,8 +581,8 @@ Here some explanations on how these values affect the memory:</p>
 </section>
 <section id="memory-pool">
 <h2>Memory pool<a class="headerlink" href="#memory-pool" title="Link to this heading">#</a></h2>
-<p>TensorRT-LLM C++ runtime is using stream-ordered memory allocator to allocate and free buffers, see <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::initMemoryPool</a>, which uses the default memory pool managed by the CUDA driver. When a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object is destroyed, memory is returned to the memory pool and can be reused by the next instance of a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object. Memory will be released from the pool if it is required for other memory allocations.</p>
-<p>However, <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> may still show high memory occupation after memory is returned to the CUDA driver’s memory pool. This should not be a concern and is intended behavior. The amount of reserved and free memory in the pool can be inspected by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolReserved())</a> and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolFree())</a>, respectively.</p>
+<p>TensorRT-LLM C++ runtime is using stream-ordered memory allocator to allocate and free buffers, see <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::initMemoryPool</a>, which uses the default memory pool managed by the CUDA driver. When a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object is destroyed, memory is returned to the memory pool and can be reused by the next instance of a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object. Memory will be released from the pool if it is required for other memory allocations.</p>
+<p>However, <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> may still show high memory occupation after memory is returned to the CUDA driver’s memory pool. This should not be a concern and is intended behavior. The amount of reserved and free memory in the pool can be inspected by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolReserved())</a> and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolFree())</a>, respectively.</p>
 </section>
 <section id="known-issues">
 <h2>Known Issues<a class="headerlink" href="#known-issues" title="Link to this heading">#</a></h2>
@@ -771,9 +773,9 @@ Here some explanations on how these values affect the memory:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/reference/multimodal-feature-support-matrix.html b/latest/legacy/reference/multimodal-feature-support-matrix.html
index 7096e36034..aa2d56eac9 100644
--- a/latest/legacy/reference/multimodal-feature-support-matrix.html
+++ b/latest/legacy/reference/multimodal-feature-support-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -691,9 +693,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/reference/precision.html b/latest/legacy/reference/precision.html
index 6569dcb0d9..32fde85273 100644
--- a/latest/legacy/reference/precision.html
+++ b/latest/legacy/reference/precision.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -582,7 +584,7 @@ maintaining the accuracy of the network (on downstream tasks).</p>
 weights of the model. TensorRT-LLM includes scripts to prepare the model to
 run using the SmoothQuant method.</p>
 <p>Examples of how to enable SmoothQuant for GPT, GPT-J and LLaMA can be found in
-the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/quantization">examples/quantization</a> folder of that release.</p>
+the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/quantization">examples/quantization</a> folder of that release.</p>
 </section>
 <section id="int4-and-int8-weight-only-w4a16-and-w8a16">
 <h2>INT4 and INT8 Weight-Only (W4A16 and W8A16)<a class="headerlink" href="#int4-and-int8-weight-only-w4a16-and-w8a16" title="Link to this heading">#</a></h2>
@@ -591,8 +593,8 @@ a model and dequantizing those weights on-the-fly in linear layers (Matmuls).
 The activations are encoded using floating-point values (FP16 or BF16).</p>
 <p>To use INT4/INT8 Weight-Only methods, the user must determine the scaling
 factors to use to quantize and dequantize the weights of the model.</p>
-<p>This release includes examples for <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/models/core/gpt">GPT</a> and
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/models/core/llama">LLaMA</a>.</p>
+<p>This release includes examples for <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/models/core/gpt">GPT</a> and
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/models/core/llama">LLaMA</a>.</p>
 </section>
 <section id="gptq-and-awq-w4a16">
 <h2>GPTQ and AWQ (W4A16)<a class="headerlink" href="#gptq-and-awq-w4a16" title="Link to this heading">#</a></h2>
@@ -602,19 +604,19 @@ and
 <a class="reference external" href="https://arxiv.org/abs/2306.00978">https://arxiv.org/abs/2306.00978</a>,
 respectively. TensorRT-LLM supports per-group scaling factors and
 zero-offsetting in linear layers to implement GPTQ and AWQ methods. See the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin">WeightOnlyGroupwiseQuantMatmulPlugin</a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin">WeightOnlyGroupwiseQuantMatmulPlugin</a>
 plugin and the corresponding
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/quantization/functional.py"><code class="docutils literal notranslate"><span class="pre">weight_only_groupwise_quant_matmul</span></code></a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/quantization/functional.py"><code class="docutils literal notranslate"><span class="pre">weight_only_groupwise_quant_matmul</span></code></a>
 Python function, for details.</p>
-<p>This release includes examples of applying GPTQ to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/models/core/gpt">GPT-NeoX</a>
-and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/models/core/llama">LLaMA-v2</a>, as well as an example of using AWQ with
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/models/contrib/gptj">GPT-J</a>.</p>
+<p>This release includes examples of applying GPTQ to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/models/core/gpt">GPT-NeoX</a>
+and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/models/core/llama">LLaMA-v2</a>, as well as an example of using AWQ with
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/models/contrib/gptj">GPT-J</a>.</p>
 </section>
 <section id="fp8-hopper">
 <h2>FP8 (Hopper)<a class="headerlink" href="#fp8-hopper" title="Link to this heading">#</a></h2>
 <p>This release of TensorRT-LLM contains implementations of FP8 for GPT-NeMo,
 GPT-J and LLaMA. Those examples can be found in
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/examples/quantization">examples/quantization</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/examples/quantization">examples/quantization</a>.</p>
 </section>
 <section id="nvfp4-blackwell">
 <h2>NVFP4 (Blackwell)<a class="headerlink" href="#nvfp4-blackwell" title="Link to this heading">#</a></h2>
@@ -1116,7 +1118,7 @@ The language component decides which quantization methods are supported by a giv
 <section id="technical-detail-the-quantmode-flags">
 <h2>Technical Detail: The <code class="docutils literal notranslate"><span class="pre">QuantMode</span></code> Flags<a class="headerlink" href="#technical-detail-the-quantmode-flags" title="Link to this heading">#</a></h2>
 <p>The quantization method is controlled by the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/quantization/mode.py"><code class="docutils literal notranslate"><span class="pre">QuantMode</span></code></a> flags. The different fields
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/quantization/mode.py"><code class="docutils literal notranslate"><span class="pre">QuantMode</span></code></a> flags. The different fields
 are:</p>
 <ul class="simple">
 <li><p><code class="docutils literal notranslate"><span class="pre">INT4_WEIGHTS</span></code>, the weights are quantized to 4 bits (W4A*),</p></li>
@@ -1266,9 +1268,9 @@ are:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/reference/support-matrix.html b/latest/legacy/reference/support-matrix.html
index ca3a111462..66d51bf640 100644
--- a/latest/legacy/reference/support-matrix.html
+++ b/latest/legacy/reference/support-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -776,6 +778,7 @@ In addition, older architectures can have limitations for newer software release
 <tr class="row-odd"><td><p>GPU Model Architectures</p></td>
 <td><ul class="simple">
 <li><p><a class="reference external" href="https://www.nvidia.com/en-us/data-center/gb200-nvl72/">NVIDIA GB200 NVL72</a></p></li>
+<li><p><a class="reference external" href="https://www.nvidia.com/en-us/data-center/gb300-nvl72/">NVIDIA GB300 NVL72</a></p></li>
 <li><p><a class="reference external" href="https://www.nvidia.com/en-us/data-center/technologies/blackwell-architecture/">NVIDIA Blackwell Architecture</a></p></li>
 <li><p><a class="reference external" href="https://www.nvidia.com/en-us/data-center/grace-hopper-superchip/">NVIDIA Grace Hopper Superchip</a></p></li>
 <li><p><a class="reference external" href="https://www.nvidia.com/en-us/data-center/technologies/hopper-architecture/">NVIDIA Hopper Architecture</a></p></li>
@@ -980,9 +983,9 @@ In addition, older architectures can have limitations for newer software release
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/reference/troubleshooting.html b/latest/legacy/reference/troubleshooting.html
index 087d6cfaa2..eed4689583 100644
--- a/latest/legacy/reference/troubleshooting.html
+++ b/latest/legacy/reference/troubleshooting.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -957,9 +959,9 @@ dedicated MPI environment, not the one provided by your Slurm allocation.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/tensorrt_quickstart.html b/latest/legacy/tensorrt_quickstart.html
index 0f33b445c7..e01d0245ef 100644
--- a/latest/legacy/tensorrt_quickstart.html
+++ b/latest/legacy/tensorrt_quickstart.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -75,7 +75,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -355,6 +355,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -363,6 +364,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -528,7 +530,7 @@
 <span class="linenos"> 9</span>    <span class="n">build_config</span><span class="o">.</span><span class="n">max_num_tokens</span> <span class="o">=</span> <span class="mi">1024</span>
 <span class="linenos">10</span>
 <span class="linenos">11</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos">12</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos">12</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos">13</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">,</span>
 <span class="linenos">14</span>              <span class="n">build_config</span><span class="o">=</span><span class="n">build_config</span><span class="p">)</span>
 <span class="linenos">15</span>
@@ -677,9 +679,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/legacy/torch.html b/latest/legacy/torch.html
index 959fc878c3..c353eee0e8 100644
--- a/latest/legacy/torch.html
+++ b/latest/legacy/torch.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -696,9 +698,9 @@ This feature is currently in beta, and the related API is subjected to change in
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/llm-api/index.html b/latest/llm-api/index.html
index ee02f6e4fb..e38d46aae3 100644
--- a/latest/llm-api/index.html
+++ b/latest/llm-api/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -533,7 +535,7 @@
 <span class="linenos"> 4</span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">():</span>
 <span class="linenos"> 5</span>
 <span class="linenos"> 6</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos"> 7</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos"> 7</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos"> 8</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">)</span>
 <span class="linenos"> 9</span>
 <span class="linenos">10</span>    <span class="c1"># Sample prompts.</span>
@@ -787,9 +789,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/llm-api/reference.html b/latest/llm-api/reference.html
index 0bc489b2ff..bb0f499a9b 100644
--- a/latest/llm-api/reference.html
+++ b/latest/llm-api/reference.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -610,6 +612,7 @@
 <li><p><strong>nvfp4_gemm_config</strong> (<em>tensorrt_llm.llmapi.llm_args.Nvfp4GemmConfig</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> NVFP4 GEMM backend config. Defaults to None.</p></li>
 <li><p><strong>attn_backend</strong> (<em>str</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Attention backend to use. Defaults to TRTLLM.</p></li>
 <li><p><strong>sampler_type</strong> (<em>Union</em><em>[</em><em>str</em><em>, </em><em>tensorrt_llm.llmapi.llm_args.SamplerType</em><em>]</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> The type of sampler to use. Options are TRTLLMSampler, TorchSampler or auto. Defaults to auto, which will use TorchSampler unless BeamSearch is requested. Defaults to auto.</p></li>
+<li><p><strong>sampler_force_async_worker</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Force usage of the async worker in the sampler for D2H copies, even if confidential compute is not active. Normally, the async worker should only be used when confidential compute is active. This argument is provided to enable it for testing purposes, irrespective of confidential compute state. Defaults to False.</p></li>
 <li><p><strong>enable_iter_perf_stats</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable iteration performance statistics. Defaults to False.</p></li>
 <li><p><strong>enable_iter_req_stats</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> If true, enables per request stats per iteration. Must also set enable_iter_perf_stats to true to get request stats. Defaults to False.</p></li>
 <li><p><strong>print_iter_log</strong> (<em>bool</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Print iteration logs. Defaults to False.</p></li>
@@ -633,6 +636,7 @@ If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader
 <li><p><strong>kv_connector_config</strong> (<em>Optional</em><em>[</em><em>tensorrt_llm.llmapi.llm_args.KvCacheConnectorConfig</em><em>]</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> The config for KV cache connector. Defaults to None.</p></li>
 <li><p><strong>mm_encoder_only</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Only load/execute the vision encoder part of the full model. Defaults to False. Defaults to False.</p></li>
 <li><p><strong>ray_worker_extension_cls</strong> (<em>Optional</em><em>[</em><em>str</em><em>]</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> The full worker extension class name including module path.Allows users to extend the functions of the RayGPUWorker class. Defaults to None.</p></li>
+<li><p><strong>ray_placement_config</strong> (<em>Optional</em><em>[</em><em>tensorrt_llm.llmapi.llm_args.RayPlacementConfig</em><em>]</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Placement config for RayGPUWorker. Only used with AsyncLLM and orchestrator_type=’ray’. Defaults to None.</p></li>
 <li><p><strong>enable_sleep</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable LLM sleep feature. Sleep feature requires extra setup that may slowdown model loading.Only enable it if you intend to use this feature. Defaults to False.</p></li>
 <li><p><strong>disable_flashinfer_sampling</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Disable the use of FlashInfer.sampling. This option is likely to be removed in the future. Defaults to False.</p></li>
 </ul>
@@ -910,6 +914,338 @@ after prompts have been submitted.</p>
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">AsyncLLM</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">placement_groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">placement_bundle_indices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">per_worker_gpu_share</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM" title="Link to this definition">#</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.llmapi.LLM" title="tensorrt_llm.llmapi.llm.LLM"><code class="xref py py-class docutils literal notranslate"><span class="pre">LLM</span></code></a></p>
+<p>AsyncLLM is a subclass of LLM that supports asynchronous setup, release and
+resume operations that are necessary for RL or agentic scenarios.</p>
+<p>Currently, RL APIs are only supported with Ray orchestrator.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">placement_groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">placement_bundle_indices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">per_worker_gpu_share</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.__init__" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.collective_rpc">
+<em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">collective_rpc</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">()</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">unique_reply_rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.collective_rpc"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.collective_rpc" title="Link to this definition">#</a></dt>
+<dd><p>Execute an asynchronous RPC call on all GPU workers. Currently, this is only supported for RayExecutor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>method</strong> (<em>str</em>) – The name of the worker method to execute.</p></li>
+<li><p><strong>args</strong> (<em>tuple</em><em>[</em><em>Any</em><em>, </em><em>...</em><em>]</em>) – Positional arguments to pass to the worker method. Defaults to ().</p></li>
+<li><p><strong>kwargs</strong> (<em>dict</em><em>, </em><em>optional</em>) – Keyword arguments to pass to the worker method. Defaults to None.</p></li>
+<li><p><strong>unique_reply_rank</strong> (<em>int</em><em>, </em><em>optional</em>) – The rank of the worker that will be used to send the reply.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>A list of results from each worker.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>list[Any]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.generate">
+<span class="sig-name descname"><span class="pre">generate</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">sampling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">use_tqdm</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">lora_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><span class="pre">LoRARequest</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><span class="pre">LoRARequest</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">prompt_adapter_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">PromptAdapterRequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">PromptAdapterRequest</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_retention_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><span class="pre">KvCacheRetentionConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><span class="pre">KvCacheRetentionConfig</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">disaggregated_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><span class="pre">DisaggregatedParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><span class="pre">DisaggregatedParams</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">scheduling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">SchedulingParams</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">SchedulingParams</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">cache_salt</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.generate" title="Link to this definition">#</a></dt>
+<dd><p>Generate output for the given prompts in the synchronous mode.
+Synchronous generation accepts either single prompt or batched prompts.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>inputs</strong> (<em>tensorrt_llm.inputs.data.PromptInputs</em><em>, </em><em>Sequence</em><em>[</em><em>tensorrt_llm.inputs.data.PromptInputs</em><em>]</em>) – The prompt text or token ids.
+It can be single prompt or batched prompts.</p></li>
+<li><p><strong>sampling_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><em>tensorrt_llm.sampling_params.SamplingParams</em></a><em>, </em><em>List</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><em>tensorrt_llm.sampling_params.SamplingParams</em></a><em>]</em><em>, </em><em>optional</em>) – The sampling params for the generation. Defaults to None.
+A default one will be used if not provided.</p></li>
+<li><p><strong>use_tqdm</strong> (<em>bool</em>) – Whether to use tqdm to display the progress bar. Defaults to True.</p></li>
+<li><p><strong>lora_request</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><em>tensorrt_llm.executor.request.LoRARequest</em></a><em>, </em><em>Sequence</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><em>tensorrt_llm.executor.request.LoRARequest</em></a><em>]</em><em>, </em><em>optional</em>) – LoRA request to use for generation, if any. Defaults to None.</p></li>
+<li><p><strong>prompt_adapter_request</strong> (<em>tensorrt_llm.executor.request.PromptAdapterRequest</em><em>, </em><em>Sequence</em><em>[</em><em>tensorrt_llm.executor.request.PromptAdapterRequest</em><em>]</em><em>, </em><em>optional</em>) – Prompt Adapter request to use for generation, if any. Defaults to None.</p></li>
+<li><p><strong>kv_cache_retention_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><em>tensorrt_llm.bindings.executor.KvCacheRetentionConfig</em></a><em>, </em><em>Sequence</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><em>tensorrt_llm.bindings.executor.KvCacheRetentionConfig</em></a><em>]</em><em>, </em><em>optional</em>) – Configuration for the request’s retention in the KV Cache. Defaults to None.</p></li>
+<li><p><strong>disaggregated_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><em>tensorrt_llm.disaggregated_params.DisaggregatedParams</em></a><em>, </em><em>Sequence</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><em>tensorrt_llm.disaggregated_params.DisaggregatedParams</em></a><em>]</em><em>, </em><em>optional</em>) – Disaggregated parameters. Defaults to None.</p></li>
+<li><p><strong>scheduling_params</strong> (<em>tensorrt_llm.scheduling_params.SchedulingParams</em><em>, </em><em>List</em><em>[</em><em>tensorrt_llm.scheduling_params.SchedulingParams</em><em>]</em><em>, </em><em>optional</em>) – Scheduling parameters. Defaults to None.</p></li>
+<li><p><strong>cache_salt</strong> (<em>str</em><em>, </em><em>Sequence</em><em>[</em><em>str</em><em>]</em><em>, </em><em>optional</em>) – If specified, KV cache will be salted with the provided string to limit the kv cache reuse to the requests with the same string. Defaults to None.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The output data of the completion request to the LLM.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>Union[<a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">tensorrt_llm.llmapi.RequestOutput</a>, List[<a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">tensorrt_llm.llmapi.RequestOutput</a>]]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.generate_async">
+<span class="sig-name descname"><span class="pre">generate_async</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">sampling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">lora_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><span class="pre">LoRARequest</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">prompt_adapter_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">PromptAdapterRequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">streaming</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_retention_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><span class="pre">KvCacheRetentionConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">disaggregated_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><span class="pre">DisaggregatedParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">trace_headers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Mapping</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">_postproc_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">PostprocParams</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">scheduling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">SchedulingParams</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">cache_salt</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.generate_async" title="Link to this definition">#</a></dt>
+<dd><p>Generate output for the given prompt in the asynchronous mode.
+Asynchronous generation accepts single prompt only.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>inputs</strong> (<em>tensorrt_llm.inputs.data.PromptInputs</em>) – The prompt text or token ids; it must be single prompt.</p></li>
+<li><p><strong>sampling_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><em>tensorrt_llm.sampling_params.SamplingParams</em></a><em>, </em><em>optional</em>) – The sampling params for the generation. Defaults to None.
+A default one will be used if not provided.</p></li>
+<li><p><strong>lora_request</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.LoRARequest" title="tensorrt_llm.executor.request.LoRARequest"><em>tensorrt_llm.executor.request.LoRARequest</em></a><em>, </em><em>optional</em>) – LoRA request to use for generation, if any. Defaults to None.</p></li>
+<li><p><strong>prompt_adapter_request</strong> (<em>tensorrt_llm.executor.request.PromptAdapterRequest</em><em>, </em><em>optional</em>) – Prompt Adapter request to use for generation, if any. Defaults to None.</p></li>
+<li><p><strong>streaming</strong> (<em>bool</em>) – Whether to use the streaming mode for the generation. Defaults to False.</p></li>
+<li><p><strong>kv_cache_retention_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheRetentionConfig" title="tensorrt_llm.bindings.executor.KvCacheRetentionConfig"><em>tensorrt_llm.bindings.executor.KvCacheRetentionConfig</em></a><em>, </em><em>optional</em>) – Configuration for the request’s retention in the KV Cache. Defaults to None.</p></li>
+<li><p><strong>disaggregated_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.DisaggregatedParams" title="tensorrt_llm.disaggregated_params.DisaggregatedParams"><em>tensorrt_llm.disaggregated_params.DisaggregatedParams</em></a><em>, </em><em>optional</em>) – Disaggregated parameters. Defaults to None.</p></li>
+<li><p><strong>trace_headers</strong> (<em>Mapping</em><em>[</em><em>str</em><em>, </em><em>str</em><em>]</em><em>, </em><em>optional</em>) – Trace headers. Defaults to None.</p></li>
+<li><p><strong>scheduling_params</strong> (<em>tensorrt_llm.scheduling_params.SchedulingParams</em><em>, </em><em>optional</em>) – Scheduling parameters. Defaults to None.</p></li>
+<li><p><strong>cache_salt</strong> (<em>str</em><em>, </em><em>optional</em>) – If specified, KV cache will be salted with the provided string to limit the kv cache reuse to the requests with the same string. Defaults to None.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The output data of the completion request to the LLM.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">tensorrt_llm.llmapi.RequestOutput</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events">
+<span class="sig-name descname"><span class="pre">get_kv_cache_events</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">timeout</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Get iteration KV events from the runtime.</p>
+<dl class="simple">
+<dt>KV events are used to track changes and operations within the KV Cache. Types of events:</dt><dd><ul class="simple">
+<li><p>KVCacheCreatedData: Indicates the creation of cache blocks.</p></li>
+<li><p>KVCacheStoredData: Represents a sequence of stored blocks.</p></li>
+<li><p>KVCacheRemovedData: Contains the hashes of blocks that are being removed from the cache.</p></li>
+<li><p>KVCacheUpdatedData: Captures updates to existing cache blocks.</p></li>
+</ul>
+</dd>
+<dt>To enable KV events:</dt><dd><ul class="simple">
+<li><p>set <cite>event_buffer_max_size</cite> to a positive integer in the <cite>KvCacheConfig</cite>.</p></li>
+<li><p>set <cite>enable_block_reuse</cite> to True in the <cite>KvCacheConfig</cite>.</p></li>
+</ul>
+</dd>
+</dl>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>timeout</strong> (<em>float</em><em>, </em><em>optional</em>) – Max wait time in seconds when retrieving events from queue. Defaults to 2.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>A list of runtime events as dict.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>List[dict]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events_async">
+<span class="sig-name descname"><span class="pre">get_kv_cache_events_async</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">timeout</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">IterationResult</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events_async" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Get iteration KV events from the runtime.</p>
+<dl class="simple">
+<dt>KV events are used to track changes and operations within the KV Cache. Types of events:</dt><dd><ul class="simple">
+<li><p>KVCacheCreatedData: Indicates the creation of cache blocks.</p></li>
+<li><p>KVCacheStoredData: Represents a sequence of stored blocks.</p></li>
+<li><p>KVCacheRemovedData: Contains the hashes of blocks that are being removed from the cache.</p></li>
+<li><p>KVCacheUpdatedData: Captures updates to existing cache blocks.</p></li>
+</ul>
+</dd>
+<dt>To enable KV events:</dt><dd><ul class="simple">
+<li><p>set <cite>event_buffer_max_size</cite> to a positive integer in the <cite>KvCacheConfig</cite>.</p></li>
+<li><p>set <cite>enable_block_reuse</cite> to True in the <cite>KvCacheConfig</cite>.</p></li>
+</ul>
+</dd>
+</dl>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>timeout</strong> (<em>float</em><em>, </em><em>optional</em>) – Max wait time in seconds when retrieving events from queue. . Defaults to 2.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>An async iterable object containing runtime events.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>tensorrt_llm.executor.result.IterationResult</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.get_stats">
+<span class="sig-name descname"><span class="pre">get_stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">timeout</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.get_stats" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Get iteration statistics from the runtime.
+To collect statistics, call this function after prompts have been submitted with LLM().generate().</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>timeout</strong> (<em>float</em><em>, </em><em>optional</em>) – Max wait time in seconds when retrieving stats from queue. Defaults to 2.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p><dl class="simple">
+<dt>A list of runtime stats as dict.</dt><dd><p>e.g., [‘{“cpuMemUsage”: …, “iter”: 0, …}’, ‘{“cpuMemUsage”: …, “iter”: 1, …}’]</p>
+</dd>
+</dl>
+</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>List[dict]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.get_stats_async">
+<span class="sig-name descname"><span class="pre">get_stats_async</span></span><span class="sig-paren">(</span>
+
+<dl>
+<dd><em class="sig-param"><span class="n"><span class="pre">timeout</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>,</dd>
+</dl>
+
+<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">IterationResult</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.get_stats_async" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Get iteration statistics from the runtime.
+To collect statistics, you can call this function in an async coroutine or the /metrics endpoint (if you’re using trtllm-serve)
+after prompts have been submitted.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>timeout</strong> (<em>float</em><em>, </em><em>optional</em>) – Max wait time in seconds when retrieving stats from queue. Defaults to 2.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>An async iterable object containing runtime stats.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>tensorrt_llm.executor.result.IterationResult</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.release">
+<em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">release</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tags</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.release"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.release" title="Link to this definition">#</a></dt>
+<dd><p>Release the GPU memory used by the LLM asynchronously.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>tags</strong> – List of memory tag strings to release (e.g., [“model”, “kv_cache”]).</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.resume">
+<em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">resume</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tags</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.resume"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.resume" title="Link to this definition">#</a></dt>
+<dd><p>Resume the GPU memory used by the LLM asynchronously.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>tags</strong> – List of memory tag strings to resume (e.g., [“model”, “kv_cache”]).</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.setup_async">
+<em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">setup_async</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.setup_async"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.setup_async" title="Link to this definition">#</a></dt>
+<dd><p>Setup the LLM asynchronously.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.shutdown">
+<span class="sig-name descname"><span class="pre">shutdown</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.shutdown" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> None</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.update_weights">
+<em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">update_weights</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">weights</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/_torch/async_llm.html#AsyncLLM.update_weights"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.update_weights" title="Link to this definition">#</a></dt>
+<dd><p>Update the weights of the LLM asynchronously.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>weights</strong> – Dictionary mapping device UUIDs to IPC handles for weight tensors.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.llm_id">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">llm_id</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.llm_id" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> None</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AsyncLLM.tokenizer">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">tokenizer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AsyncLLM.tokenizer" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.MultimodalEncoder">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">MultimodalEncoder</span></span><span class="sig-paren">(</span>
@@ -2047,6 +2383,7 @@ Setting temperature = 0 results in greedy sampling.</p></li>
 <dd><em class="sig-param"><span class="n"><span class="pre">temperature</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">top_p</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">top_k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">use_beam_search</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></em>,</dd>
 </dl>
 
 <span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/sampling_params.html#SamplingParams.params_imply_greedy_decoding"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding" title="Link to this definition">#</a></dt>
@@ -4616,6 +4953,7 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">max_window_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">max_ngram_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">3</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">max_verification_set_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>,</dd>
@@ -4634,6 +4972,11 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -5360,7 +5703,7 @@ a subset of the possible backends.</p>
 
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values">
-<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -5397,7 +5740,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_ngram_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=3,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">tokens</span> <span class="pre">per</span> <span class="pre">NGram.'),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_verification_set_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">NGrams</span> <span class="pre">in</span> <span class="pre">verification</span> <span class="pre">branch</span> <span class="pre">per</span> <span class="pre">step.'),</span> <span class="pre">'max_window_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">NGrams</span> <span class="pre">in</span> <span class="pre">lookahead</span> <span class="pre">branch</span> <span class="pre">per</span> <span class="pre">step.'),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_ngram_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=3,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">tokens</span> <span class="pre">per</span> <span class="pre">NGram.'),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_verification_set_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">NGrams</span> <span class="pre">in</span> <span class="pre">verification</span> <span class="pre">branch</span> <span class="pre">per</span> <span class="pre">step.'),</span> <span class="pre">'max_window_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Number</span> <span class="pre">of</span> <span class="pre">NGrams</span> <span class="pre">in</span> <span class="pre">lookahead</span> <span class="pre">branch</span> <span class="pre">per</span> <span class="pre">step.'),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -5436,6 +5779,7 @@ a subset of the possible backends.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">medusa_choices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">num_medusa_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 </dl>
@@ -5452,6 +5796,11 @@ a subset of the possible backends.</p>
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -6121,7 +6470,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'medusa_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'num_medusa_heads':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'medusa_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'num_medusa_heads':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -6160,6 +6509,7 @@ a subset of the possible backends.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">eagle_choices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">greedy_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">posterior_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
@@ -6183,6 +6533,11 @@ a subset of the possible backends.</p>
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.EagleDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.EagleDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -6892,7 +7247,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.EagleDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'dynamic_tree_max_topK':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_layers_to_capture':</span> <span class="pre">FieldInfo(annotation=Union[Set[int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_one_model':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'eagle_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'greedy_sampling':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_non_leaves_per_layer':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'num_eagle_layers':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'posterior_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'use_dynamic_tree':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=False)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.EagleDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'dynamic_tree_max_topK':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_layers_to_capture':</span> <span class="pre">FieldInfo(annotation=Union[Set[int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_one_model':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'eagle_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'greedy_sampling':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_non_leaves_per_layer':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'num_eagle_layers':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'posterior_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'use_dynamic_tree':</span> <span class="pre">FieldInfo(annotation=Union[bool,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=False)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.EagleDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -6939,6 +7294,7 @@ Otherwise, assume Eagle3 base set and return 3.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">num_nextn_predict_layers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">use_relaxed_acceptance_for_thinking</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">relaxed_topk</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
@@ -6962,6 +7318,11 @@ Otherwise, assume Eagle3 base set and return 3.</p>
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MTPDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MTPDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">begin_thinking_phase_token</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">128798</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token" title="Link to this definition">#</a></dt>
@@ -7660,7 +8021,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.MTPDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'begin_thinking_phase_token':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=128798),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'end_thinking_phase_token':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=128799),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'mtp_eagle_one_model':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'num_nextn_predict_layers':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'num_nextn_predict_layers_from_model_config':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'relaxed_delta':</span> <span class="pre">FieldInfo(annotation=float,</span> <span class="pre">required=False,</span> <span class="pre">default=0.0),</span> <span class="pre">'relaxed_topk':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'use_mtp_vanilla':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'use_relaxed_acceptance_for_thinking':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MTPDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'begin_thinking_phase_token':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=128798),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'end_thinking_phase_token':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=128799),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'mtp_eagle_one_model':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'num_nextn_predict_layers':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'num_nextn_predict_layers_from_model_config':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'relaxed_delta':</span> <span class="pre">FieldInfo(annotation=float,</span> <span class="pre">required=False,</span> <span class="pre">default=0.0),</span> <span class="pre">'relaxed_topk':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=1),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'use_mtp_vanilla':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'use_relaxed_acceptance_for_thinking':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MTPDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -14476,6 +14837,7 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">max_matching_ngram_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">is_keep_all</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">is_use_oldest</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
@@ -14511,6 +14873,11 @@ Whether to use a common pool for all requests, or the pool is private for each r
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.NGramDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.NGramDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -15190,7 +15557,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.NGramDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'is_keep_all':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'is_public_pool':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'is_use_oldest':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_matching_ngram_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=0),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.NGramDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'is_keep_all':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'is_public_pool':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'is_use_oldest':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_matching_ngram_size':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=0),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.NGramDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -15229,6 +15596,7 @@ a subset of the possible backends.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">drafter</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">object</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">resource_manager</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">object</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 </dl>
@@ -15245,6 +15613,11 @@ a subset of the possible backends.</p>
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.UserProvidedDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -15914,7 +16287,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'drafter':</span> <span class="pre">FieldInfo(annotation=object,</span> <span class="pre">required=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'resource_manager':</span> <span class="pre">FieldInfo(annotation=object,</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'drafter':</span> <span class="pre">FieldInfo(annotation=object,</span> <span class="pre">required=True),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'resource_manager':</span> <span class="pre">FieldInfo(annotation=object,</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -16623,6 +16996,7 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 </dl>
 
 <span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#DraftTargetDecodingConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig" title="Link to this definition">#</a></dt>
@@ -16637,6 +17011,11 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.DraftTargetDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -17296,7 +17675,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -17395,6 +17774,7 @@ a subset of the possible backends.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">nvfp4_gemm_config:</span> <span class="pre">~tensorrt_llm.llmapi.llm_args.Nvfp4GemmConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">attn_backend:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'TRTLLM'</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">sampler_type:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">~tensorrt_llm.llmapi.llm_args.SamplerType</span> <span class="pre">=</span> <span class="pre">SamplerType.auto</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">sampler_force_async_worker:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">enable_iter_perf_stats:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">enable_iter_req_stats:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">print_iter_log:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
@@ -17422,6 +17802,7 @@ a subset of the possible backends.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">kv_connector_config:</span> <span class="pre">~tensorrt_llm.llmapi.llm_args.KvCacheConnectorConfig</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">mm_encoder_only:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">ray_worker_extension_cls:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">ray_placement_config:</span> <span class="pre">~tensorrt_llm.llmapi.llm_args.RayPlacementConfig</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">enable_sleep:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">disable_flashinfer_sampling:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
 </dl>
@@ -17798,6 +18179,12 @@ If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader
 <dd><p><code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> Print iteration logs.</p>
 </dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.ray_placement_config">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ray_placement_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">RayPlacementConfig</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.ray_placement_config" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Placement config for RayGPUWorker. Only used with AsyncLLM and orchestrator_type=’ray’.</p>
+</dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ray_worker_extension_cls</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls" title="Link to this definition">#</a></dt>
@@ -17828,6 +18215,12 @@ If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader
 <dd><p><code class="tag stable docutils literal notranslate"><span class="pre">stable</span></code> The revision to use for the model.</p>
 </dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.sampler_force_async_worker">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">sampler_force_async_worker</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.sampler_force_async_worker" title="Link to this definition">#</a></dt>
+<dd><p><code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Force usage of the async worker in the sampler for D2H copies, even if confidential compute is not active. Normally, the async worker should only be used when confidential compute is active. This argument is provided to enable it for testing purposes, irrespective of confidential compute state.</p>
+</dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.sampler_type">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">sampler_type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">SamplerType</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">SamplerType.auto</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.sampler_type" title="Link to this definition">#</a></dt>
@@ -17920,6 +18313,12 @@ validated to form a valid model.</p>
 <p><cite>self</cite> is explicitly positional-only to allow <cite>self</cite> as a field name.</p>
 </dd></dl>
 
+<dl class="py method pydantic_validator">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.coerce_env_overrides_to_str">
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">coerce_env_overrides_to_str</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">env_overrides</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.coerce_env_overrides_to_str" title="Link to this definition">#</a></dt>
+<dd><p>Coerce env_overrides values to strings for os.environ compatibility.</p>
+</dd></dl>
+
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format">
 <em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">convert_load_format</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">load_format</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#TorchLlmArgs.convert_load_format"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format" title="Link to this definition">#</a></dt>
@@ -18036,6 +18435,12 @@ validated to form a valid model.</p>
 <em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_gpus_per_node</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">gpus_per_node</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py method pydantic_validator">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block">
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_helix_tokens_per_block</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">all</span> <span class="pre">fields</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#TorchLlmArgs.validate_helix_tokens_per_block"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block" title="Link to this definition">#</a></dt>
+<dd><p>Validate that cp_config.tokens_per_block matches kv_cache_config.tokens_per_block when HELIX parallelism is active.</p>
+</dd></dl>
+
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer">
 <em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_load_balancer</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">all</span> <span class="pre">fields</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#TorchLlmArgs.validate_load_balancer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer" title="Link to this definition">#</a></dt>
@@ -18066,6 +18471,11 @@ validated to form a valid model.</p>
 <em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_peft_cache_config</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">all</span> <span class="pre">fields</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py method pydantic_validator">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config">
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_ray_placement_config</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">all</span> <span class="pre">fields</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#TorchLlmArgs.validate_ray_placement_config"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls">
 <em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_ray_worker_extension_cls</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">all</span> <span class="pre">fields</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#TorchLlmArgs.validate_ray_worker_extension_cls"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls" title="Link to this definition">#</a></dt>
@@ -18623,6 +19033,12 @@ validated to form a valid model.</p>
 <p><cite>self</cite> is explicitly positional-only to allow <cite>self</cite> as a field name.</p>
 </dd></dl>
 
+<dl class="py method pydantic_validator">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.TrtLlmArgs.coerce_env_overrides_to_str">
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">coerce_env_overrides_to_str</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">env_overrides</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TrtLlmArgs.coerce_env_overrides_to_str" title="Link to this definition">#</a></dt>
+<dd><p>Coerce env_overrides values to strings for os.environ compatibility.</p>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs">
 <em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_kwargs</span></span><span class="sig-paren">(</span>
@@ -18813,6 +19229,7 @@ validated to form a valid model.</p>
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 </dl>
 
 <span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#AutoDecodingConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig" title="Link to this definition">#</a></dt>
@@ -18831,6 +19248,11 @@ speculation algorithm with some heuristic.</p>
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.AutoDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -19484,7 +19906,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.AutoDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.AutoDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -20236,6 +20658,7 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <dd><em class="sig-param"><span class="n"><span class="pre">load_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">acceptance_length_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">allow_advanced_sampling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">output_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">write_interval</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">file_prefix</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'data'</span></span></em>,</dd>
@@ -20255,6 +20678,11 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">acceptance_window</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.allow_advanced_sampling">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">allow_advanced_sampling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.allow_advanced_sampling" title="Link to this definition">#</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">draft_len_schedule</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule" title="Link to this definition">#</a></dt>
@@ -20957,7 +21385,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_layers_to_capture':</span> <span class="pre">FieldInfo(annotation=Union[Set[int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">init=False),</span> <span class="pre">'file_prefix':</span> <span class="pre">FieldInfo(annotation=str,</span> <span class="pre">required=False,</span> <span class="pre">default='data'),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=1,</span> <span class="pre">init=False),</span> <span class="pre">'output_directory':</span> <span class="pre">FieldInfo(annotation=str,</span> <span class="pre">required=True),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'write_interval':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=20)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'acceptance_length_threshold':</span> <span class="pre">FieldInfo(annotation=Union[float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'acceptance_window':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'allow_advanced_sampling':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False),</span> <span class="pre">'draft_len_schedule':</span> <span class="pre">FieldInfo(annotation=Union[dict[int,</span> <span class="pre">int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle3_layers_to_capture':</span> <span class="pre">FieldInfo(annotation=Union[Set[int],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'eagle_choices':</span> <span class="pre">FieldInfo(annotation=Union[List[List[int]],</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">init=False),</span> <span class="pre">'file_prefix':</span> <span class="pre">FieldInfo(annotation=str,</span> <span class="pre">required=False,</span> <span class="pre">default='data'),</span> <span class="pre">'load_format':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_concurrency':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_draft_len':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'max_total_draft_tokens':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=1,</span> <span class="pre">init=False),</span> <span class="pre">'output_directory':</span> <span class="pre">FieldInfo(annotation=str,</span> <span class="pre">required=True),</span> <span class="pre">'speculative_model_dir':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">Path,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None),</span> <span class="pre">'write_interval':</span> <span class="pre">FieldInfo(annotation=int,</span> <span class="pre">required=False,</span> <span class="pre">default=20)}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -20996,6 +21424,7 @@ Otherwise, assume Eagle3 base set and return 3 + 1 (for post norm last hidden st
 
 <dl>
 <dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">seq_len_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">window_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">32</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">kernel_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">63</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">topr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">128</span></span></em>,</dd>
@@ -21032,6 +21461,12 @@ Otherwise, assume Eagle3 base set and return 3 + 1 (for post norm last hidden st
 <dd><p>Prompt budget</p>
 </dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RocketSparseAttentionConfig.seq_len_threshold">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">seq_len_threshold</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.seq_len_threshold" title="Link to this definition">#</a></dt>
+<dd><p>The sequence length threshold for separating short and long sequences.</p>
+</dd></dl>
+
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">topk</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">64</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk" title="Link to this definition">#</a></dt>
@@ -21556,6 +21991,15 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 </dl>
 </dd></dl>
 
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RocketSparseAttentionConfig.needs_separate_short_long_cuda_graphs">
+<span class="sig-name descname"><span class="pre">needs_separate_short_long_cuda_graphs</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.needs_separate_short_long_cuda_graphs" title="Link to this definition">#</a></dt>
+<dd><p>Determines whether to capture a dedicated CUDA graph for batches consisting entirely of short sequences.
+If True, capture distinct graphs for short-only batches and general cases (e.g., long or mixed batches).
+If False, capture a single unified CUDA graph for all sequences regardless of length.
+The seq_len_threshold parameter defines the cutoff boundary between short and long sequences.</p>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file">
 <em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">parse_file</span></span><span class="sig-paren">(</span>
@@ -21683,7 +22127,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'kernel_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=63,</span> <span class="pre">description='The</span> <span class="pre">kernel</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">snap</span> <span class="pre">KV.'),</span> <span class="pre">'kt_cache_dtype':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default='float8_e5m2',</span> <span class="pre">description='KT</span> <span class="pre">cache</span> <span class="pre">dtype',</span> <span class="pre">json_schema_extra={'choices':</span> <span class="pre">['bfloat16',</span> <span class="pre">'float8_e5m2']}),</span> <span class="pre">'page_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Page</span> <span class="pre">size'),</span> <span class="pre">'prompt_budget':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=2048,</span> <span class="pre">description='Prompt</span> <span class="pre">budget'),</span> <span class="pre">'topk':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=64,</span> <span class="pre">description='Top-k'),</span> <span class="pre">'topr':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=128,</span> <span class="pre">description='Top-r'),</span> <span class="pre">'window_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=32,</span> <span class="pre">description='The</span> <span class="pre">window</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">snap</span> <span class="pre">KV.')}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'kernel_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=63,</span> <span class="pre">description='The</span> <span class="pre">kernel</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">snap</span> <span class="pre">KV.'),</span> <span class="pre">'kt_cache_dtype':</span> <span class="pre">FieldInfo(annotation=Union[str,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default='float8_e5m2',</span> <span class="pre">description='KT</span> <span class="pre">cache</span> <span class="pre">dtype',</span> <span class="pre">json_schema_extra={'choices':</span> <span class="pre">['bfloat16',</span> <span class="pre">'float8_e5m2']}),</span> <span class="pre">'page_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=4,</span> <span class="pre">description='Page</span> <span class="pre">size'),</span> <span class="pre">'prompt_budget':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=2048,</span> <span class="pre">description='Prompt</span> <span class="pre">budget'),</span> <span class="pre">'seq_len_threshold':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">sequence</span> <span class="pre">length</span> <span class="pre">threshold</span> <span class="pre">for</span> <span class="pre">separating</span> <span class="pre">short</span> <span class="pre">and</span> <span class="pre">long</span> <span class="pre">sequences.'),</span> <span class="pre">'topk':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=64,</span> <span class="pre">description='Top-k'),</span> <span class="pre">'topr':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">float,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=128,</span> <span class="pre">description='Top-r'),</span> <span class="pre">'window_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=32,</span> <span class="pre">description='The</span> <span class="pre">window</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">snap</span> <span class="pre">KV.')}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -21709,10 +22153,12 @@ a subset of the possible backends.</p>
 
 <dl>
 <dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">seq_len_threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">index_n_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">index_head_dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">index_topk</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
 <dd><em class="sig-param"><span class="n"><span class="pre">indexer_max_chunk_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
+<dd><em class="sig-param"><span class="n"><span class="pre">skip_indexer_for_short_seqs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
 </dl>
 
 <span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#DeepSeekSparseAttentionConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig" title="Link to this definition">#</a></dt>
@@ -21742,6 +22188,18 @@ a subset of the possible backends.</p>
 <dd><p>The maximum chunk size for the indexer.</p>
 </dd></dl>
 
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.seq_len_threshold">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">seq_len_threshold</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.seq_len_threshold" title="Link to this definition">#</a></dt>
+<dd><p>The sequence length threshold for separating short and long sequences.</p>
+</dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.skip_indexer_for_short_seqs">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">skip_indexer_for_short_seqs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.skip_indexer_for_short_seqs" title="Link to this definition">#</a></dt>
+<dd><p>Whether to skip the MQA and Top-K in the indexer for short sequences.</p>
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Config</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config" title="Link to this definition">#</a></dt>
@@ -22254,6 +22712,13 @@ If rebuilding _was_ required, returns <cite>True</cite> if rebuilding was succes
 </dl>
 </dd></dl>
 
+<dl class="py method">
+<dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs">
+<span class="sig-name descname"><span class="pre">needs_separate_short_long_cuda_graphs</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs" title="Link to this definition">#</a></dt>
+<dd><p>Whether to capture separate CUDA graphs for short and long sequences.
+Use seq_len_threshold to determine the threshold for separating short and long sequences.</p>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file">
 <em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">parse_file</span></span><span class="sig-paren">(</span>
@@ -22381,7 +22846,7 @@ a subset of the possible backends.</p>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields">
-<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'index_head_dim':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">dimension</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">indexer</span> <span class="pre">heads.'),</span> <span class="pre">'index_n_heads':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">number</span> <span class="pre">of</span> <span class="pre">heads</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.'),</span> <span class="pre">'index_topk':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">topk</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.'),</span> <span class="pre">'indexer_max_chunk_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">maximum</span> <span class="pre">chunk</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.')}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields" title="Link to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">model_fields</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">{'index_head_dim':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">dimension</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">indexer</span> <span class="pre">heads.'),</span> <span class="pre">'index_n_heads':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">number</span> <span class="pre">of</span> <span class="pre">heads</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.'),</span> <span class="pre">'index_topk':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">topk</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.'),</span> <span class="pre">'indexer_max_chunk_size':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">maximum</span> <span class="pre">chunk</span> <span class="pre">size</span> <span class="pre">for</span> <span class="pre">the</span> <span class="pre">indexer.'),</span> <span class="pre">'seq_len_threshold':</span> <span class="pre">FieldInfo(annotation=Union[int,</span> <span class="pre">NoneType],</span> <span class="pre">required=False,</span> <span class="pre">default=None,</span> <span class="pre">description='The</span> <span class="pre">sequence</span> <span class="pre">length</span> <span class="pre">threshold</span> <span class="pre">for</span> <span class="pre">separating</span> <span class="pre">short</span> <span class="pre">and</span> <span class="pre">long</span> <span class="pre">sequences.'),</span> <span class="pre">'skip_indexer_for_short_seqs':</span> <span class="pre">FieldInfo(annotation=bool,</span> <span class="pre">required=False,</span> <span class="pre">default=False,</span> <span class="pre">description='Whether</span> <span class="pre">to</span> <span class="pre">skip</span> <span class="pre">the</span> <span class="pre">MQA</span> <span class="pre">and</span> <span class="pre">Top-K</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">indexer</span> <span class="pre">for</span> <span class="pre">short</span> <span class="pre">sequences.')}</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py property">
@@ -22467,6 +22932,24 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM"><code class="docutils literal notranslate"><span class="pre">AsyncLLM</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.collective_rpc"><code class="docutils literal notranslate"><span class="pre">collective_rpc()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.generate"><code class="docutils literal notranslate"><span class="pre">generate()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.generate_async"><code class="docutils literal notranslate"><span class="pre">generate_async()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events"><code class="docutils literal notranslate"><span class="pre">get_kv_cache_events()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events_async"><code class="docutils literal notranslate"><span class="pre">get_kv_cache_events_async()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.get_stats"><code class="docutils literal notranslate"><span class="pre">get_stats()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.get_stats_async"><code class="docutils literal notranslate"><span class="pre">get_stats_async()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.release"><code class="docutils literal notranslate"><span class="pre">release()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.resume"><code class="docutils literal notranslate"><span class="pre">resume()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.setup_async"><code class="docutils literal notranslate"><span class="pre">setup_async()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.shutdown"><code class="docutils literal notranslate"><span class="pre">shutdown()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.update_weights"><code class="docutils literal notranslate"><span class="pre">update_weights()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.llm_id"><code class="docutils literal notranslate"><span class="pre">llm_id</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AsyncLLM.tokenizer"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MultimodalEncoder"><code class="docutils literal notranslate"><span class="pre">MultimodalEncoder</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MultimodalEncoder.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MultimodalEncoder.generate"><code class="docutils literal notranslate"><span class="pre">generate()</span></code></a></li>
@@ -22797,6 +23280,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig"><code class="docutils literal notranslate"><span class="pre">LookaheadDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency"><code class="docutils literal notranslate"><span class="pre">max_concurrency</span></code></a></li>
@@ -22859,6 +23343,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig"><code class="docutils literal notranslate"><span class="pre">MedusaDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency"><code class="docutils literal notranslate"><span class="pre">max_concurrency</span></code></a></li>
@@ -22911,6 +23396,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig"><code class="docutils literal notranslate"><span class="pre">EagleDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK"><code class="docutils literal notranslate"><span class="pre">dynamic_tree_max_topK</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture"><code class="docutils literal notranslate"><span class="pre">eagle3_layers_to_capture</span></code></a></li>
@@ -22972,6 +23458,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig"><code class="docutils literal notranslate"><span class="pre">MTPDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token"><code class="docutils literal notranslate"><span class="pre">begin_thinking_phase_token</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MTPDecodingConfig.end_thinking_phase_token"><code class="docutils literal notranslate"><span class="pre">end_thinking_phase_token</span></code></a></li>
@@ -23610,6 +24097,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig"><code class="docutils literal notranslate"><span class="pre">NGramDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all"><code class="docutils literal notranslate"><span class="pre">is_keep_all</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool"><code class="docutils literal notranslate"><span class="pre">is_public_pool</span></code></a></li>
@@ -23664,6 +24152,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig"><code class="docutils literal notranslate"><span class="pre">UserProvidedDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter"><code class="docutils literal notranslate"><span class="pre">drafter</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
@@ -23760,6 +24249,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig"><code class="docutils literal notranslate"><span class="pre">DraftTargetDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency"><code class="docutils literal notranslate"><span class="pre">max_concurrency</span></code></a></li>
@@ -23870,11 +24360,13 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir"><code class="docutils literal notranslate"><span class="pre">postprocess_tokenizer_dir</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.pp_partition"><code class="docutils literal notranslate"><span class="pre">pp_partition</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log"><code class="docutils literal notranslate"><span class="pre">print_iter_log</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.ray_placement_config"><code class="docutils literal notranslate"><span class="pre">ray_placement_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls"><code class="docutils literal notranslate"><span class="pre">ray_worker_extension_cls</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser"><code class="docutils literal notranslate"><span class="pre">reasoning_parser</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations"><code class="docutils literal notranslate"><span class="pre">request_stats_max_iterations</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics"><code class="docutils literal notranslate"><span class="pre">return_perf_metrics</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.revision"><code class="docutils literal notranslate"><span class="pre">revision</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.sampler_force_async_worker"><code class="docutils literal notranslate"><span class="pre">sampler_force_async_worker</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.sampler_type"><code class="docutils literal notranslate"><span class="pre">sampler_type</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config"><code class="docutils literal notranslate"><span class="pre">scheduler_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init"><code class="docutils literal notranslate"><span class="pre">skip_tokenizer_init</span></code></a></li>
@@ -23892,6 +24384,7 @@ a subset of the possible backends.</p>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.coerce_env_overrides_to_str"><code class="docutils literal notranslate"><span class="pre">coerce_env_overrides_to_str</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format"><code class="docutils literal notranslate"><span class="pre">convert_load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs"><code class="docutils literal notranslate"><span class="pre">from_kwargs()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config"><code class="docutils literal notranslate"><span class="pre">get_executor_config()</span></code></a></li>
@@ -23908,12 +24401,14 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config"><code class="docutils literal notranslate"><span class="pre">validate_cuda_graph_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype"><code class="docutils literal notranslate"><span class="pre">validate_dtype</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node"><code class="docutils literal notranslate"><span class="pre">validate_gpus_per_node</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block"><code class="docutils literal notranslate"><span class="pre">validate_helix_tokens_per_block</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer"><code class="docutils literal notranslate"><span class="pre">validate_load_balancer</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency"><code class="docutils literal notranslate"><span class="pre">validate_lora_config_consistency</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_misc"><code class="docutils literal notranslate"><span class="pre">validate_misc</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_model"><code class="docutils literal notranslate"><span class="pre">validate_model</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config"><code class="docutils literal notranslate"><span class="pre">validate_parallel_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config"><code class="docutils literal notranslate"><span class="pre">validate_peft_cache_config</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config"><code class="docutils literal notranslate"><span class="pre">validate_ray_placement_config</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls"><code class="docutils literal notranslate"><span class="pre">validate_ray_worker_extension_cls</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args"><code class="docutils literal notranslate"><span class="pre">validate_runtime_args</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config"><code class="docutils literal notranslate"><span class="pre">validate_speculative_config</span></code></a></li>
@@ -24002,6 +24497,7 @@ a subset of the possible backends.</p>
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.coerce_env_overrides_to_str"><code class="docutils literal notranslate"><span class="pre">coerce_env_overrides_to_str</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs"><code class="docutils literal notranslate"><span class="pre">from_kwargs()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes"><code class="docutils literal notranslate"><span class="pre">get_runtime_sizes()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.TrtLlmArgs.init_build_config"><code class="docutils literal notranslate"><span class="pre">init_build_config</span></code></a></li>
@@ -24038,6 +24534,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig"><code class="docutils literal notranslate"><span class="pre">AutoDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.load_format"><code class="docutils literal notranslate"><span class="pre">load_format</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency"><code class="docutils literal notranslate"><span class="pre">max_concurrency</span></code></a></li>
@@ -24140,6 +24637,7 @@ a subset of the possible backends.</p>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig"><code class="docutils literal notranslate"><span class="pre">SaveHiddenStatesDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold"><code class="docutils literal notranslate"><span class="pre">acceptance_length_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window"><code class="docutils literal notranslate"><span class="pre">acceptance_window</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.allow_advanced_sampling"><code class="docutils literal notranslate"><span class="pre">allow_advanced_sampling</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule"><code class="docutils literal notranslate"><span class="pre">draft_len_schedule</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture"><code class="docutils literal notranslate"><span class="pre">eagle3_layers_to_capture</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices"><code class="docutils literal notranslate"><span class="pre">eagle_choices</span></code></a></li>
@@ -24198,6 +24696,7 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.kt_cache_dtype"><code class="docutils literal notranslate"><span class="pre">kt_cache_dtype</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size"><code class="docutils literal notranslate"><span class="pre">page_size</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget"><code class="docutils literal notranslate"><span class="pre">prompt_budget</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.seq_len_threshold"><code class="docutils literal notranslate"><span class="pre">seq_len_threshold</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk"><code class="docutils literal notranslate"><span class="pre">topk</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr"><code class="docutils literal notranslate"><span class="pre">topr</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size"><code class="docutils literal notranslate"><span class="pre">window_size</span></code></a></li>
@@ -24224,6 +24723,7 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate"><code class="docutils literal notranslate"><span class="pre">model_validate()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json"><code class="docutils literal notranslate"><span class="pre">model_validate_json()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings"><code class="docutils literal notranslate"><span class="pre">model_validate_strings()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.needs_separate_short_long_cuda_graphs"><code class="docutils literal notranslate"><span class="pre">needs_separate_short_long_cuda_graphs()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file"><code class="docutils literal notranslate"><span class="pre">parse_file()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj"><code class="docutils literal notranslate"><span class="pre">parse_obj()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw"><code class="docutils literal notranslate"><span class="pre">parse_raw()</span></code></a></li>
@@ -24245,6 +24745,8 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads"><code class="docutils literal notranslate"><span class="pre">index_n_heads</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk"><code class="docutils literal notranslate"><span class="pre">index_topk</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size"><code class="docutils literal notranslate"><span class="pre">indexer_max_chunk_size</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.seq_len_threshold"><code class="docutils literal notranslate"><span class="pre">seq_len_threshold</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.skip_indexer_for_short_seqs"><code class="docutils literal notranslate"><span class="pre">skip_indexer_for_short_seqs</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config"><code class="docutils literal notranslate"><span class="pre">Config</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra"><code class="docutils literal notranslate"><span class="pre">extra</span></code></a></li>
 </ul>
@@ -24268,6 +24770,7 @@ a subset of the possible backends.</p>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate"><code class="docutils literal notranslate"><span class="pre">model_validate()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json"><code class="docutils literal notranslate"><span class="pre">model_validate_json()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings"><code class="docutils literal notranslate"><span class="pre">model_validate_strings()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs"><code class="docutils literal notranslate"><span class="pre">needs_separate_short_long_cuda_graphs()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file"><code class="docutils literal notranslate"><span class="pre">parse_file()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj"><code class="docutils literal notranslate"><span class="pre">parse_obj()</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw"><code class="docutils literal notranslate"><span class="pre">parse_raw()</span></code></a></li>
@@ -24375,9 +24878,9 @@ a subset of the possible backends.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/models/adding-new-model.html b/latest/models/adding-new-model.html
index c8de8d2c6d..db91134cbf 100644
--- a/latest/models/adding-new-model.html
+++ b/latest/models/adding-new-model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -874,9 +876,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/models/supported-models.html b/latest/models/supported-models.html
index e6d1d2e975..023930f9e1 100644
--- a/latest/models/supported-models.html
+++ b/latest/models/supported-models.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -542,71 +544,75 @@
 <td><p>DeepSeek-V3</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">deepseek-ai/DeepSeek-V3</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Exaone4ForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">DeepseekV32ForCausalLM</span></code></p></td>
+<td><p>DeepSeek-V3.2</p></td>
+<td><p><code class="docutils literal notranslate"><span class="pre">deepseek-ai/DeepSeek-V3.2</span></code></p></td>
+</tr>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Exaone4ForCausalLM</span></code></p></td>
 <td><p>EXAONE 4.0</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">LGAI-EXAONE/EXAONE-4.0-32B</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Gemma3ForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Gemma3ForCausalLM</span></code></p></td>
 <td><p>Gemma 3</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">google/gemma-3-1b-it</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">GptOssForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">GptOssForCausalLM</span></code></p></td>
 <td><p>GPT-OSS</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-oss-120b</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">LlamaForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">LlamaForCausalLM</span></code></p></td>
 <td><p>Llama 3.1, Llama 3, Llama 2, LLaMA</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">meta-llama/Meta-Llama-3.1-70B</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Llama4ForConditionalGeneration</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Llama4ForConditionalGeneration</span></code></p></td>
 <td><p>Llama 4</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">meta-llama/Llama-4-Scout-17B-16E-Instruct</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">MistralForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">MistralForCausalLM</span></code></p></td>
 <td><p>Mistral</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">mistralai/Mistral-7B-v0.1</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">MixtralForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">MixtralForCausalLM</span></code></p></td>
 <td><p>Mixtral</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">mistralai/Mixtral-8x7B-v0.1</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">MllamaForConditionalGeneration</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">MllamaForConditionalGeneration</span></code></p></td>
 <td><p>Llama 3.2</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">meta-llama/Llama-3.2-11B-Vision</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">NemotronForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">NemotronForCausalLM</span></code></p></td>
 <td><p>Nemotron-3, Nemotron-4, Minitron</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">nvidia/Minitron-8B-Base</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">NemotronNASForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">NemotronNASForCausalLM</span></code></p></td>
 <td><p>NemotronNAS</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">nvidia/Llama-3_3-Nemotron-Super-49B-v1</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Phi3ForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Phi3ForCausalLM</span></code></p></td>
 <td><p>Phi-4</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">microsoft/Phi-4</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForCausalLM</span></code></p></td>
 <td><p>QwQ, Qwen2</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen2-7B-Instruct</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForProcessRewardModel</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForProcessRewardModel</span></code></p></td>
 <td><p>Qwen2-based</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen2.5-Math-PRM-7B</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForRewardModel</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen2ForRewardModel</span></code></p></td>
 <td><p>Qwen2-based</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen2.5-Math-RM-72B</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3ForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3ForCausalLM</span></code></p></td>
 <td><p>Qwen3</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen3-8B</span></code></p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3MoeForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3MoeForCausalLM</span></code></p></td>
 <td><p>Qwen3MoE</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen3-30B-A3B</span></code></p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3NextForCausalLM</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3NextForCausalLM</span></code></p></td>
 <td><p>Qwen3Next</p></td>
 <td><p><code class="docutils literal notranslate"><span class="pre">Qwen/Qwen3-Next-80B-A3B-Thinking</span></code></p></td>
 </tr>
@@ -652,7 +658,23 @@
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3MoeForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">DeepseekV32ForCausalLM</span></code></p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>No</p></td>
+<td><p>No</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+<td><p>N/A</p></td>
+<td><p>Yes</p></td>
+<td><p>Yes</p></td>
+</tr>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3MoeForCausalLM</span></code></p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
@@ -668,7 +690,7 @@
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3NextForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Qwen3NextForCausalLM</span></code></p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>No</p></td>
@@ -684,7 +706,7 @@
 <td><p>Untested</p></td>
 <td><p>Untested</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">Llama4ForConditionalGeneration</span></code></p></td>
+<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">Llama4ForConditionalGeneration</span></code></p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
@@ -700,7 +722,7 @@
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">GptOssForCausalLM</span></code></p></td>
+<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">GptOssForCausalLM</span></code></p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
@@ -1017,9 +1039,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/objects.inv b/latest/objects.inv
index eac17f25da..24c19c7307 100644
Binary files a/latest/objects.inv and b/latest/objects.inv differ
diff --git a/latest/overview.html b/latest/overview.html
index 028eff7eb2..9ebcea69d5 100644
--- a/latest/overview.html
+++ b/latest/overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -530,7 +532,7 @@
 <section id="architected-on-pytorch">
 <h3>🔥 <strong>Architected on Pytorch</strong><a class="headerlink" href="#architected-on-pytorch" title="Link to this heading">#</a></h3>
 <p>TensorRT LLM provides a high-level Python <a class="reference internal" href="quick-start-guide.html#run-offline-inference-with-llm-api"><span class="std std-ref">LLM API</span></a> that supports a wide range of inference setups - from single-GPU to multi-GPU or multi-node deployments. It includes built-in support for various parallelism strategies and advanced features. The LLM API integrates seamlessly with the broader inference ecosystem, including NVIDIA <a class="reference external" href="https://github.com/ai-dynamo/dynamo">Dynamo</a> and the <a class="reference external" href="https://github.com/triton-inference-server/server">Triton Inference Server</a>.</p>
-<p>TensorRT LLM is designed to be modular and easy to modify. Its PyTorch-native architecture allows developers to experiment with the runtime or extend functionality. Several popular models are also pre-defined and can be customized using <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078/tensorrt_llm/_torch/models/modeling_deepseekv3.py">native PyTorch code</a>, making it easy to adapt the system to specific needs.</p>
+<p>TensorRT LLM is designed to be modular and easy to modify. Its PyTorch-native architecture allows developers to experiment with the runtime or extend functionality. Several popular models are also pre-defined and can be customized using <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426/tensorrt_llm/_torch/models/modeling_deepseekv3.py">native PyTorch code</a>, making it easy to adapt the system to specific needs.</p>
 </section>
 <section id="state-of-the-art-performance">
 <h3>⚡ <strong>State-of-the-Art Performance</strong><a class="headerlink" href="#state-of-the-art-performance" title="Link to this heading">#</a></h3>
@@ -581,7 +583,7 @@
 <h3>🔧 <strong>Latest GPU Architecture Support</strong><a class="headerlink" href="#latest-gpu-architecture-support" title="Link to this heading">#</a></h3>
 <p>TensorRT LLM supports the full spectrum of NVIDIA GPU architectures:</p>
 <ul class="simple">
-<li><p><strong>NVIDIA Blackwell</strong>: B200, GB200, RTX Pro 6000 SE with FP4 optimization</p></li>
+<li><p><strong>NVIDIA Blackwell</strong>: B200, GB200, B300, GB300, and RTX Pro 6000 SE with FP4 optimization</p></li>
 <li><p><strong>NVIDIA Hopper</strong>: H100, H200,GH200 with FP8 acceleration</p></li>
 <li><p><strong>NVIDIA Ada Lovelace</strong>: L40/L40S, RTX 40 series with FP8 acceleration</p></li>
 <li><p><strong>NVIDIA Ampere</strong>: A100, RTX 30 series for production workloads</p></li>
@@ -746,9 +748,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/py-modindex.html b/latest/py-modindex.html
index c32011a230..4c4219d92c 100644
--- a/latest/py-modindex.html
+++ b/latest/py-modindex.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
 
@@ -356,6 +356,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -364,6 +365,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -687,9 +689,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/quick-start-guide.html b/latest/quick-start-guide.html
index 96e7e49f61..e7b09e0797 100644
--- a/latest/quick-start-guide.html
+++ b/latest/quick-start-guide.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -360,6 +360,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -368,6 +369,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -525,7 +527,7 @@
 <section id="launch-docker-container">
 <h2>Launch Docker Container<a class="headerlink" href="#launch-docker-container" title="Link to this heading">#</a></h2>
 <p>The <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags">TensorRT LLM container</a> maintained by NVIDIA contains all of the required dependencies pre-installed. You can start the container on a machine with NVIDIA GPUs via:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 </pre></div>
 </div>
 </section>
@@ -610,7 +612,7 @@ Inference can then be performed using examples similar to the one provided below
 <span class="linenos"> 4</span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">():</span>
 <span class="linenos"> 5</span>
 <span class="linenos"> 6</span>    <span class="c1"># Model could accept HF model name, a path to local HF model,</span>
-<span class="linenos"> 7</span>    <span class="c1"># or TensorRT Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
+<span class="linenos"> 7</span>    <span class="c1"># or Model Optimizer&#39;s quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.</span>
 <span class="linenos"> 8</span>    <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">&quot;TinyLlama/TinyLlama-1.1B-Chat-v1.0&quot;</span><span class="p">)</span>
 <span class="linenos"> 9</span>
 <span class="linenos">10</span>    <span class="c1"># Sample prompts.</span>
@@ -810,9 +812,9 @@ To learn more about the LLM API, check out the <a class="reference internal" hre
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/release-notes.html b/latest/release-notes.html
index 46e68a5279..e02838beb7 100644
--- a/latest/release-notes.html
+++ b/latest/release-notes.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -2456,9 +2458,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/search.html b/latest/search.html
index cd1b3c75b6..5dc3156d8e 100644
--- a/latest/search.html
+++ b/latest/search.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -82,7 +82,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -362,6 +362,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -370,6 +371,7 @@
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -632,9 +634,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/searchindex.js b/latest/searchindex.js
index aafa298269..cc295e5c47 100644
--- a/latest/searchindex.js
+++ b/latest/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"(H200 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-only-using-openai-triton-kernels-for-moe"]], "(H200/H100 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-h100-only-using-openai-triton-kernels-for-moe"]], "1. Add the field to TorchLlmArgs": [[36, "add-the-field-to-torchllmargs"]], "1. Balance Ratio": [[8, "balance-ratio"]], "1. Committed APIs": [[36, "committed-apis"]], "1. Download TensorRT LLM": [[2, "download-tensorrt-llm"]], "1. Expert Replication and Load Balancing": [[99, "expert-replication-and-load-balancing"]], "1. Implement the method in _TorchLLM": [[36, "implement-the-method-in-torchllm"]], "1. Initial Approach for Weight Updating - cudaMemcpyAsync": [[20, "initial-approach-for-weight-updating-cudamemcpyasync"]], "1. Knob Naming": [[36, "knob-naming"]], "1. Scheduler (Leader) Interface (KvCacheConnectorScheduler)": [[92, "scheduler-leader-interface-kvcacheconnectorscheduler"]], "1. Using a Model from the Hugging Face Hub": [[154, "using-a-model-from-the-hugging-face-hub"]], "1. Weights size": [[147, "weights-size"]], "2. Activation size": [[147, "activation-size"]], "2. Avoiding Deadlock - Multithreaded CPU Copy with Managed Memory": [[20, "avoiding-deadlock-multithreaded-cpu-copy-with-managed-memory"]], "2. Custom EP Communication Kernels": [[99, "custom-ep-communication-kernels"]], "2. Download the DeepSeek R1 models": [[2, "download-the-deepseek-r1-models"]], "2. Hierarchical Configuration": [[36, "hierarchical-configuration"]], "2. Non-committed APIs": [[36, "non-committed-apis"]], "2. Speed-of-Light Throughput (SOL TPS)": [[8, "speed-of-light-throughput-sol-tps"]], "2. Update the API schema": [[36, "update-the-api-schema"], [36, "id1"]], "2. Using a Local Hugging Face Model": [[154, "using-a-local-hugging-face-model"]], "2. Worker Interface (KvCacheConnectorWorker)": [[92, "worker-interface-kvcacheconnectorworker"]], "3. Build and run TensorRT LLM container": [[2, "build-and-run-tensorrt-llm-container"]], "3. Expert Parallelism Load Balancer (EPLB)": [[99, "expert-parallelism-load-balancer-eplb"]], "3. I/O tensors": [[147, "i-o-tensors"]], "3. NUMA Memory to Prevent Page Migration": [[20, "numa-memory-to-prevent-page-migration"]], "3. Prefer LlmArgs Over Environment Variables": [[36, "prefer-llmargs-over-environment-variables"]], "3. Run validation tests": [[36, "run-validation-tests"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[147, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[147, "kv-cache-tensor"]], "4. Addressing the TLB Thrashing Issue": [[20, "addressing-the-tlb-thrashing-issue"]], "4. Compile and Install TensorRT LLM": [[2, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[2, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[2, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[115, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ADP Balance Strategy": [[8, null]], "ADP Balance Strategy: Coordinated Waiting Mechanism": [[8, "adp-balance-strategy-coordinated-waiting-mechanism"]], "ADP Balance with Context Wait Implementation": [[8, "adp-balance-with-context-wait-implementation"]], "ADP Balance with Full Strategy Implementation": [[8, "adp-balance-with-full-strategy-implementation"]], "ALiBi": [[79, "alibi"], [113, "alibi"]], "API": [[111, "api"]], "API Change Principles": [[36, "api-change-principles"]], "API Changes": [[122, "api-changes"], [160, "api-changes"], [160, "id4"], [160, "id9"], [160, "id13"], [160, "id24"], [160, "id29"], [160, "id34"], [160, "id39"], [160, "id46"], [160, "id51"], [160, "id57"], [160, "id63"], [160, "id69"]], "API Reference": [[92, "api-reference"], [105, null], [155, null]], "API Schema Management": [[36, "api-schema-management"]], "API Types and Stability Guarantees": [[36, "api-types-and-stability-guarantees"]], "ATen IR Optimization": [[104, "aten-ir-optimization"]], "AWQ Quantization Scaling Factors": [[124, "awq-quantization-scaling-factors"]], "About": [[24, "about"], [27, "about"]], "About Speculative Sampling": [[121, "about-speculative-sampling"]], "About TensorRT LLM": [[158, "about-tensorrt-llm"]], "About extra_llm_api_options": [[26, "about-extra-llm-api-options"]], "Access & Licensing": [[31, "access-licensing"], [32, "access-licensing"]], "Accuracy": [[7, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[14, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[14, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgement": [[8, "acknowledgement"], [16, "acknowledgement"], [17, "acknowledgement"]], "Acknowledgements": [[10, "acknowledgements"], [11, "acknowledgements"], [12, "acknowledgements"], [20, "acknowledgements"]], "Acknowledgment": [[13, "acknowledgment"], [14, "acknowledgment"], [15, "acknowledgment"]], "Activation": [[142, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[123, null]], "Adding a New Argument": [[36, "adding-a-new-argument"]], "Adding a New Method": [[36, "adding-a-new-method"]], "Adding a New Model": [[156, null]], "Adding a New Model in PyTorch Backend": [[161, null]], "Additional Outputs": [[78, null]], "Advanced Configuration": [[80, "advanced-configuration"], [164, "advanced-configuration"]], "Advanced Usage": [[85, "advanced-usage"], [95, "advanced-usage"], [170, "advanced-usage"], [173, "advanced-usage"]], "Advanced topics": [[106, "advanced-topics"]], "Algorithm": [[119, "algorithm"]], "Algorithm & Complexity": [[19, "algorithm-complexity"]], "An Example: Implement Dynasor-CoT on Scaffolding": [[11, "an-example-implement-dynasor-cot-on-scaffolding"]], "Announcements": [[160, "announcements"], [160, "id67"]], "Architecture": [[11, "architecture"], [36, "architecture"], [92, "architecture"], [101, "architecture"]], "Architecture Overview": [[40, null], [99, "architecture-overview"]], "Architecture Ovewiew": [[162, null]], "Asyncio-Based Generation": [[47, "asyncio-based-generation"]], "Attempts at Online EPLB Implementation": [[20, "attempts-at-online-eplb-implementation"]], "Attention": [[142, "module-tensorrt_llm.layers.attention"], [163, null]], "Attention Backends": [[79, "attention-backends"], [163, "attention-backends"]], "Attention Kernel": [[13, "attention-kernel"]], "Attention Module": [[99, "attention-module"]], "Attention Weights": [[124, "attention-weights"]], "Attention Window Size": [[93, "attention-window-size"]], "Attention backends": [[86, "attention-backends"], [171, "attention-backends"]], "Attention for MTP": [[14, "attention-for-mtp"]], "Auto Multi-stream": [[104, "auto-multi-stream"]], "AutoDeploy": [[170, null]], "AutoDeploy (Prototype)": [[85, null]], "Autoregressive MTP Layers": [[13, "autoregressive-mtp-layers"]], "Auto\u2011Enablement with Heuristic": [[19, "autoenablement-with-heuristic"]], "Avoiding unnecessary --disable-fail-fast usage": [[37, "avoiding-unnecessary-disable-fail-fast-usage"]], "B200 max-throughput for R1 with FP16 KV cache": [[2, "b200-max-throughput-for-r1-with-fp16-kv-cache"]], "B200 max-throughput for R1-0528 with FP8 KV cache": [[2, "b200-max-throughput-for-r1-0528-with-fp8-kv-cache"]], "B200 min-latency": [[2, "b200-min-latency"]], "Background": [[13, "background"], [14, "background"], [95, "background"], [96, "background"], [173, "background"]], "Background & Motivation": [[19, "background-motivation"]], "Background Knowledge": [[104, "background-knowledge"]], "Background and Challenges": [[10, "background-and-challenges"]], "Background and Motivation": [[11, "background-and-motivation"]], "BaseCheckpointLoader": [[87, "basecheckpointloader"], [172, "basecheckpointloader"]], "BaseConfigLoader": [[87, "baseconfigloader"], [172, "baseconfigloader"]], "BaseWeightLoader": [[87, "baseweightloader"], [172, "baseweightloader"]], "BaseWeightMapper": [[87, "baseweightmapper"], [172, "baseweightmapper"]], "Baseline Performance": [[8, "baseline-performance"]], "Baseline: Round-Robin Token Distribution": [[8, "baseline-round-robin-token-distribution"]], "Basic Implementation": [[14, "basic-implementation"]], "Basic Performance Configuration (autodeploy_config.yaml)": [[80, "basic-performance-configuration-autodeploy-config-yaml"], [164, "basic-performance-configuration-autodeploy-config-yaml"]], "Basic Test": [[28, "basic-test"], [29, "basic-test"], [31, "basic-test"], [32, "basic-test"], [33, "basic-test"], [34, "basic-test"]], "Basic Usage": [[80, "basic-usage"], [87, "basic-usage"], [95, "basic-usage"], [101, "basic-usage"], [164, "basic-usage"], [172, "basic-usage"], [173, "basic-usage"]], "Basics": [[55, "basics"]], "Beam search": [[102, "beam-search"]], "Beam-Search": [[79, "beam-search"], [113, "beam-search"]], "Before Benchmarking": [[42, "before-benchmarking"], [132, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[133, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[2, "benchmark"], [2, "id1"], [7, "benchmark"], [30, "benchmark"]], "Benchmarking Default Performance": [[133, null]], "Benchmarking Performance": [[28, "benchmarking-performance"], [29, "benchmarking-performance"], [31, "benchmarking-performance"], [32, "benchmarking-performance"], [33, "benchmarking-performance"], [34, "benchmarking-performance"]], "Benchmarking a non-Medusa Low Latency Engine": [[132, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with LoRA Adapters in PyTorch workflow": [[42, "benchmarking-with-lora-adapters-in-pytorch-workflow"], [132, "benchmarking-with-lora-adapters-in-pytorch-workflow"]], "Benchmarking with trtllm-bench": [[80, null], [133, "benchmarking-with-trtllm-bench"], [164, null]], "Best Practices": [[36, "best-practices"], [99, "best-practices"]], "Best practices to choose the right quantization methods": [[7, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[116, "block"]], "Blogs": [[105, null]], "Boost settings": [[42, "boost-settings"], [132, "boost-settings"]], "Build APIs": [[127, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[124, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[47, "build-configuration"]], "Build TensorRT LLM": [[106, "build-tensorrt-llm"]], "Build the TensorRT LLM Docker Image": [[128, null]], "Build the TensorRT LLM Docker Image and Upload to DockerHub": [[128, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [129, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[132, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[132, "building-a-medusa-low-latency-engine"]], "Building a TensorRT LLM Docker Image": [[106, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[133, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[133, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[106, null]], "Building the Python Bindings for the C++ Runtime": [[106, "building-the-python-bindings-for-the-c-runtime"]], "Built-in Checkpoint Formats": [[87, "built-in-checkpoint-formats"], [172, "built-in-checkpoint-formats"]], "Built-in Default Configuration": [[82, "built-in-default-configuration"], [166, "built-in-default-configuration"]], "C++ Executor API Example": [[111, "c-executor-api-example"]], "C++ GPT Runtime": [[114, null]], "C++ extension": [[16, "c-extension"]], "C++ runtime": [[147, "c-runtime"], [147, "id1"]], "CI pipelines": [[37, "ci-pipelines"]], "CLI Arguments with Dot Notation": [[82, "cli-arguments-with-dot-notation"], [166, "cli-arguments-with-dot-notation"]], "CLI Reference": [[105, null]], "CLI Tools": [[127, "cli-tools"]], "CUDA Callback": [[10, "cuda-callback"]], "CUDA Graph": [[40, "cuda-graph"]], "CUDA Graph & Programmatic Dependent Launch": [[13, "cuda-graph-programmatic-dependent-launch"]], "CUDA Graph Compatibility: Grammar Computation": [[10, "cuda-graph-compatibility-grammar-computation"]], "CUDA Graph Compatibility: Mask Applying Kernel": [[10, "cuda-graph-compatibility-mask-applying-kernel"]], "CUDA Graph Optimization": [[80, "cuda-graph-optimization"], [164, "cuda-graph-optimization"]], "CUTLASS Backend (default backend)": [[13, "cutlass-backend-default-backend"]], "Cache Layout Transformation": [[17, "cache-layout-transformation"], [88, "cache-layout-transformation"]], "Cache Management": [[95, "cache-management"], [173, "cache-management"]], "Cannot quit after generation": [[154, "cannot-quit-after-generation"]], "Capacity Scheduler Policy": [[140, "capacity-scheduler-policy"]], "Case 1 with Conversation Dataset": [[19, "case-1-with-conversation-dataset"]], "Case 2 with Translation Dataset": [[19, "case-2-with-translation-dataset"]], "Cast": [[142, "module-tensorrt_llm.layers.cast"]], "Chat API": [[27, "chat-api"]], "Checkpoint Loading": [[87, null], [172, null]], "Chunked Context": [[79, "chunked-context"], [94, "chunked-context"], [113, "chunked-context"]], "Chunked Context (a.k.a Chunked Prefill)": [[98, "chunked-context-a-k-a-chunked-prefill"]], "Chunked attention": [[94, "chunked-attention"]], "Classical Workflow": [[115, "classical-workflow"]], "Client Usage": [[95, "client-usage"], [173, "client-usage"]], "Closing": [[3, "closing"], [6, "closing"]], "Collect PyTorch profiler results": [[41, "collect-pytorch-profiler-results"], [131, "collect-pytorch-profiler-results"]], "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly": [[10, null]], "Command Overview": [[43, "command-overview"]], "Common Trace Failure": [[104, "common-trace-failure"]], "Common Workflows": [[36, "common-workflows"]], "Communication Kernel": [[13, "communication-kernel"]], "Communication Kernels": [[20, "communication-kernels"]], "Compilation": [[125, "compilation"]], "Compile Backends": [[86, "compile-backends"], [171, "compile-backends"]], "Completions API": [[27, "completions-api"]], "Conclusion": [[8, "conclusion"], [135, "conclusion"], [138, "conclusion"], [139, "conclusion"]], "Config": [[124, "config"]], "Configuration": [[91, "configuration"]], "Configuration Examples": [[80, "configuration-examples"], [164, "configuration-examples"]], "Configuration Options Reference": [[80, "configuration-options-reference"], [164, "configuration-options-reference"]], "Configuration Parameters": [[91, "configuration-parameters"], [166, "configuration-parameters"]], "Configuration Precedence and Deep Merging": [[82, "configuration-precedence-and-deep-merging"], [166, "configuration-precedence-and-deep-merging"]], "Configuration via YAML": [[168, "configuration-via-yaml"]], "Configure SSH Key": [[129, "configure-ssh-key"]], "Configure The Executor": [[111, "configure-the-executor"]], "Configuring with YAML Files": [[27, "configuring-with-yaml-files"]], "Connect to the Pod": [[129, "connect-to-the-pod"]], "Connection": [[39, "connection"]], "Container image selection": [[38, "container-image-selection"]], "Container image tags": [[107, null]], "Context Chunking Policy": [[140, "context-chunking-policy"]], "Context Parallelism (CP)": [[99, "context-parallelism-cp"]], "Context Phase": [[79, "context-phase"], [113, "context-phase"]], "Context and Generation Phases": [[79, "context-and-generation-phases"], [113, "context-and-generation-phases"]], "Context phase Q/K/V concat optimization": [[12, "context-phase-q-k-v-concat-optimization"]], "Contiguous KV Cache": [[79, "contiguous-kv-cache"], [98, "contiguous-kv-cache"], [113, "contiguous-kv-cache"]], "Continuous Integration Overview": [[37, null]], "Control generated text using logits processor": [[63, null]], "Controller": [[11, "controller"]], "Controlling KV Cache Behavior": [[93, "controlling-kv-cache-behavior"]], "Controlling output with Logits Post-Processor": [[111, "controlling-output-with-logits-post-processor"]], "Conv": [[142, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[127, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[41, "coordinating-with-nvidia-nsight-systems-launch"], [131, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[41, "coordinating-with-pytorch-profiler-pytorch-workflow-only"], [131, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Components": [[87, "core-components"], [172, "core-components"]], "Core Features": [[11, "core-features"]], "Core Models": [[156, "core-models"], [161, "core-models"]], "Core Performance Settings": [[80, "core-performance-settings"], [164, "core-performance-settings"]], "Core implementations of the GPU logic": [[16, "core-implementations-of-the-gpu-logic"]], "Core implementations of the host logic": [[16, "core-implementations-of-the-host-logic"]], "Create a Pod Template": [[129, "create-a-pod-template"]], "Create a Runpod account": [[129, "create-a-runpod-account"]], "Create the Eagle3 Configuration": [[9, "create-the-eagle3-configuration"]], "Creating Custom Checkpoint Loaders": [[87, "creating-custom-checkpoint-loaders"], [172, "creating-custom-checkpoint-loaders"]], "Creating the Extra Options Configuration": [[21, "creating-the-extra-options-configuration"], [21, "id1"]], "Cross Attention": [[79, "cross-attention"], [113, "cross-attention"]], "Curl Chat Client": [[44, null]], "Curl Chat Client For Multimodal": [[45, null]], "Curl Completion Client": [[46, null]], "Current Status": [[104, "current-status"]], "Custom Op": [[104, "custom-op"]], "Customization": [[39, "customization"], [55, "customization"]], "Customize KV Cache Manager": [[177, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[178, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[15, "data-parallel-for-attention-module-adp"]], "Data Parallelism (DP)": [[99, "data-parallelism-dp"], [99, "id2"]], "Dataset Configuration": [[8, "dataset-configuration"]], "Datatype": [[93, "datatype"]], "Debug Execution Errors": [[151, "debug-execution-errors"]], "Debug on E2E Models": [[151, "debug-on-e2e-models"]], "Debug on Unit Tests": [[151, "debug-on-unit-tests"]], "Debugging FAQs": [[88, "debugging-faqs"], [110, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[134, null]], "Decoder": [[162, "decoder"]], "DeepSeek R1": [[17, "deepseek-r1"], [43, "deepseek-r1"]], "DeepSeek R1 MTP Implementation and Optimization": [[14, null]], "Deepseek R1 Reasoning Parser": [[48, null]], "Default Build Behavior": [[132, "default-build-behavior"]], "Dense GEMM optimization": [[13, "dense-gemm-optimization"]], "Dense Models": [[99, "dense-models"]], "Deploy Kimi K2 Thinking on DGX B200 through Docker": [[30, "deploy-kimi-k2-thinking-on-dgx-b200-through-docker"]], "Deploy Kimi K2 Thinking on GB200 NVL72 through SLURM with wide EP and disaggregated serving": [[30, "deploy-kimi-k2-thinking-on-gb200-nvl72-through-slurm-with-wide-ep-and-disaggregated-serving"]], "Deploy Online Serving with trtllm-serve": [[159, "deploy-online-serving-with-trtllm-serve"]], "Deployment Guide": [[105, null]], "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell & Hopper Hardware": [[28, null]], "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware": [[29, null]], "Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell": [[30, null]], "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell & Hopper Hardware": [[31, null]], "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell & Hopper Hardware": [[32, null]], "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell & Hopper Hardware": [[33, null]], "Deployment Guide for Qwen3 on TensorRT LLM - Blackwell & Hopper Hardware": [[34, null]], "Deployment Steps": [[28, "deployment-steps"], [29, "deployment-steps"], [31, "deployment-steps"], [32, "deployment-steps"], [33, "deployment-steps"], [34, "deployment-steps"]], "Deprecated Properties": [[93, "deprecated-properties"]], "Deprecating an API": [[36, "deprecating-an-api"]], "Develop TensorRT LLM on Runpod": [[129, null]], "Developer Guide": [[103, "developer-guide"], [105, null], [153, "developer-guide"]], "Development Guide": [[104, "development-guide"]], "Disable Tokenizer": [[47, "disable-tokenizer"]], "Disaggregated Serving": [[88, null], [121, "disaggregated-serving"]], "Disaggregated Serving in TensorRT LLM": [[17, null], [17, "id1"]], "Disaggregated-Service (Prototype)": [[110, null]], "Distributed LLM Generation": [[60, null]], "DoRA": [[118, "dora"]], "Documentation": [[160, "documentation"], [160, "id43"]], "Download Artifacts": [[18, "download-artifacts"]], "Download the models (Base + Eagle3)": [[9, "download-the-models-base-eagle3"]], "Draft Model": [[10, "draft-model"]], "Draft-Target-Model": [[121, "draft-target-model"]], "Draft/Target": [[103, "draft-target"]], "Dynamo": [[17, "dynamo"], [88, "dynamo"]], "Dynamo K8s Example": [[49, null]], "E2E evaluation": [[16, "e2e-evaluation"]], "EAGLE": [[121, "eagle"]], "EAGLE 3": [[103, "eagle-3"]], "EBNF grammar": [[90, "ebnf-grammar"], [90, "id3"]], "EP Load Balancer": [[16, "ep-load-balancer"]], "EP communication kernels": [[16, "ep-communication-kernels"]], "EP communication kernels implementation": [[16, "ep-communication-kernels-implementation"]], "Eagle3 support": [[14, "eagle3-support"]], "Effect of Multi-turn conversation": [[19, "effect-of-multi-turn-conversation"]], "Embedding": [[142, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[41, "enable-gil-information-in-nvtx-markers"], [131, "enable-gil-information-in-nvtx-markers"]], "Enable Offloading to Host Memory": [[93, "enable-offloading-to-host-memory"]], "Enable garbage collection (GC) NVTX markers": [[41, "enable-garbage-collection-gc-nvtx-markers"], [131, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[117, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[41, "enable-more-nvtx-markers-for-debugging"], [131, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[128, "enable-ssh-access-to-the-container"]], "Enable/Disable Cross Request Reuse": [[93, "enable-disable-cross-request-reuse"]], "Enabling GEMM + SwiGLU Fusion": [[135, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[139, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[135, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[139, "enabling-paged-context-attention"]], "Enabling Quantization": [[135, "enabling-quantization"]], "Enabling Quantized KV Cache": [[135, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[139, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[135, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[139, "enabling-building-with-multiple-profiles"]], "Encapsulation and Overloading of Low-Level Communication Libraries": [[39, "encapsulation-and-overloading-of-low-level-communication-libraries"]], "End-to-End (E2E) Latency": [[26, "end-to-end-e2e-latency"], [28, "end-to-end-e2e-latency"], [29, "end-to-end-e2e-latency"], [31, "end-to-end-e2e-latency"], [32, "end-to-end-e2e-latency"]], "End-to-End Accuracy test": [[91, "end-to-end-accuracy-test"]], "End-to-End Performance": [[12, "end-to-end-performance"], [20, "end-to-end-performance"]], "Environment Variables": [[88, "environment-variables"], [110, "environment-variables"]], "Evaluation": [[14, "evaluation"]], "Events in KVCacheEventManager": [[116, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[13, "everything-in-one-diagram"]], "Evolution Outlook": [[39, "evolution-outlook"]], "Example": [[124, "example"]], "Example Implementation": [[92, "example-implementation"]], "Example LoRA tensors": [[118, "example-lora-tensors"]], "Example Run Script": [[81, null], [165, null]], "Example of Build Subcommand Output:": [[132, "example-of-build-subcommand-output"]], "Examples": [[41, "examples"], [96, "examples"], [125, "examples"], [126, "examples"], [131, "examples"]], "Executor": [[0, null]], "Executor API": [[111, null]], "Expanded thoughts": [[16, "expanded-thoughts"]], "Expected Result Format": [[2, "expected-result-format"], [2, "id2"], [2, "id3"], [2, "id4"]], "Expected Results": [[2, "expected-results"]], "Experimental Setup": [[19, "experimental-setup"]], "Experiments": [[8, "experiments"]], "Expert Configuration of LLM API": [[82, null], [166, null]], "Expert Configuration of build_and_run_ad.py": [[82, "expert-configuration-of-build-and-run-ad-py"], [166, "expert-configuration-of-build-and-run-ad-py"]], "Expert Parallelism (EP)": [[99, "expert-parallelism-ep"]], "Expert Parallelism Load Balancer (EPLB)": [[20, "expert-parallelism-load-balancer-eplb"]], "Expert Parallelism in TensorRT-LLM": [[112, null]], "Expert parallel for MoE (EP)": [[15, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[2, "exploring-more-isl-osl-combinations"]], "FAQ": [[147, "faq"]], "FFN Module": [[99, "ffn-module"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[115, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[149, "fp32-fp16-and-bf16"]], "FP4 Models": [[43, "fp4-models"]], "FP4 Support": [[158, "fp4-support"]], "FP8 (Hopper)": [[149, "fp8-hopper"]], "FP8 Context FMHA": [[79, "fp8-context-fmha"], [113, "fp8-context-fmha"]], "FP8 KV Cache": [[100, "fp8-kv-cache"]], "FP8 Models": [[43, "fp8-models"]], "FP8 Quantization": [[135, null]], "FP8 Quantization Scaling Factors": [[124, "fp8-quantization-scaling-factors"]], "FP8 Support": [[158, "fp8-support"]], "FP8 context FMHA support": [[12, "fp8-context-fmha-support"]], "FP8 \u201cBaseline\u201d Performance": [[135, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[3, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[3, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Combination Matrix": [[89, null]], "Feature Descriptions": [[41, "feature-descriptions"], [131, "feature-descriptions"]], "Feature List on Scaffolding": [[11, "feature-list-on-scaffolding"]], "Features": [[101, "features"], [105, null], [153, "features"]], "Finding the stage for a test": [[37, "finding-the-stage-for-a-test"]], "Fixed Issues": [[160, "fixed-issues"], [160, "id5"], [160, "id10"], [160, "id14"], [160, "id26"], [160, "id30"], [160, "id36"], [160, "id41"], [160, "id48"], [160, "id53"], [160, "id59"], [160, "id65"], [160, "id71"], [160, "id76"]], "Formatter": [[39, "formatter"]], "Fully customized": [[126, "fully-customized"]], "Functionals": [[141, null]], "Further Performance Optimization": [[20, "further-performance-optimization"]], "Fuse add (sparse exp and shared exp) into local reduction": [[12, "fuse-add-sparse-exp-and-shared-exp-into-local-reduction"]], "Fuse several AlltoAll kernels": [[12, "fuse-several-alltoall-kernels"]], "Fuse_A_GEMM": [[13, "fuse-a-gemm"]], "Future Work": [[11, "future-work"], [17, "future-work"], [20, "future-work"]], "Future Works": [[13, "future-works"], [14, "future-works"], [15, "future-works"]], "Future-Style Generation": [[47, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[135, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[139, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[149, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[42, "gpu-clock-management"], [132, "gpu-clock-management"]], "Genai Perf Client": [[50, null]], "Genai Perf Client For Multimodal": [[51, null]], "General FAQs": [[88, "general-faqs"], [110, "general-faqs"]], "General usage": [[102, "general-usage"]], "Generate text": [[57, null]], "Generate text asynchronously": [[58, null]], "Generate text in streaming": [[59, null]], "Generate text with guided decoding": [[56, null]], "Generate text with multiple LoRA adapters": [[67, null]], "Generation": [[47, "generation"]], "Generation Phase": [[79, "generation-phase"], [113, "generation-phase"]], "Get Started": [[85, "get-started"], [170, "get-started"]], "Get the TensorRT LLM Container (1.1.0rc0)": [[9, "get-the-tensorrt-llm-container-1-1-0rc0"]], "Getting Started": [[80, "getting-started"], [105, null], [164, "getting-started"]], "Graph Break": [[104, "graph-break"]], "Graph Rewriting APIs": [[115, "graph-rewriting-apis"]], "Graph Rewriting Module": [[115, null]], "Grouped GEMM": [[13, "grouped-gemm"]], "Guided Decoding": [[10, "guided-decoding"], [90, null]], "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token": [[4, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM": [[5, null]], "H200 max-throughput": [[2, "h200-max-throughput"]], "H200 min-latency": [[2, "h200-min-latency"]], "H200 vs H100": [[5, "h200-vs-h100"]], "Hang issue on Slurm Node": [[154, "hang-issue-on-slurm-node"]], "Hardware": [[43, "hardware"], [150, "hardware"]], "Hardware Support Matrix": [[100, "hardware-support-matrix"]], "Hardware and Model Configuration": [[8, "hardware-and-model-configuration"]], "Helix Parallelism": [[91, null]], "Hierarchy: Pool, Block, and Page": [[116, "hierarchy-pool-block-and-page"]], "High-level design introduction": [[16, "high-level-design-introduction"]], "Highlights": [[19, "highlights"]], "Host Overhead Optimization": [[20, "host-overhead-optimization"]], "How Helix Works": [[91, "how-helix-works"]], "How It Works": [[97, "how-it-works"], [174, "how-it-works"]], "How Much Memory is Allocated to KV Cache": [[93, "how-much-memory-is-allocated-to-kv-cache"]], "How it speeds up inference": [[11, "how-it-speeds-up-inference"]], "How the Benchmarker Works": [[132, "how-the-benchmarker-works"]], "How to Change Block Priorities": [[54, null]], "How to Change KV Cache Behavior": [[53, null]], "How to Enable": [[112, "how-to-enable"]], "How to Enable Attention Parallelism": [[99, "how-to-enable-attention-parallelism"]], "How to Enable MoE Parallelism": [[99, "how-to-enable-moe-parallelism"]], "How to Think about Model Sharding: Communication is Key": [[134, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[138, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[138, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[117, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT LLM": [[2, null]], "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server": [[18, null]], "How to reproduce": [[13, "how-to-reproduce"], [15, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[14, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[14, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[134, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "HuggingFace Format": [[87, "huggingface-format"], [172, "huggingface-format"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[149, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[149, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[79, "int8-fp8-kv-caches"], [113, "int8-fp8-kv-caches"]], "ISL 4096 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-4096-osl-1024-machine-translation-dataset"]], "ISL 4400 - OSL 1200 (Machine Translation Dataset)": [[17, "isl-4400-osl-1200-machine-translation-dataset"]], "ISL 8192 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-8192-osl-1024-machine-translation-dataset"]], "ISL 8192 - OSL 256 (Synthetic Dataset)": [[17, "isl-8192-osl-256-synthetic-dataset"]], "Implement AttentionBackend": [[79, "implement-attentionbackend"], [163, "implement-attentionbackend"]], "Implement AttentionMetadata": [[79, "implement-attentionmetadata"], [163, "implement-attentionmetadata"]], "Implement Dynasor-CoT based Majority Voting in Scaffolding": [[11, "implement-dynasor-cot-based-majority-voting-in-scaffolding"]], "Implement Dynasor-CoT in Scaffolding": [[11, "implement-dynasor-cot-in-scaffolding"]], "Implement a New Attention Backend": [[79, "implement-a-new-attention-backend"], [163, "implement-a-new-attention-backend"]], "Implementation Configuration": [[13, "implementation-configuration"]], "Implementation Details": [[36, "implementation-details"], [92, "implementation-details"]], "Important Note": [[113, "important-note"]], "In-flight Batching": [[79, "in-flight-batching"], [98, "in-flight-batching"], [113, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[111, "in-flight-batching-with-the-triton-inference-server"]], "Incorporating auto_deploy into your own workflow": [[84, null], [169, null]], "Indices and tables": [[105, "indices-and-tables"]], "Inference Endpoints": [[27, "inference-endpoints"]], "Inference Time Compute Implementation in TensorRT LLM": [[11, null]], "Infrastructure Changes": [[160, "infrastructure-changes"], [160, "id3"], [160, "id8"], [160, "id15"], [160, "id19"], [160, "id22"], [160, "id27"], [160, "id31"], [160, "id37"], [160, "id42"], [160, "id49"], [160, "id54"], [160, "id60"]], "Infrastructure changes": [[160, "id66"]], "Input QKV tensor": [[79, "input-qkv-tensor"], [113, "input-qkv-tensor"]], "Installation": [[108, null]], "Installation Errors": [[151, "installation-errors"]], "Installing on Linux via pip": [[109, null]], "Integration to TensorRT LLM Python Runtime": [[10, "integration-to-tensorrt-llm-python-runtime"]], "Interfaces": [[177, "interfaces"]], "Internal Components": [[114, "internal-components"]], "Introduction": [[15, "introduction"], [28, "introduction"], [29, "introduction"], [30, "introduction"], [31, "introduction"], [32, "introduction"], [33, "introduction"], [34, "introduction"], [156, "introduction"], [161, "introduction"]], "Introduction for Dynasor-CoT": [[11, "introduction-for-dynasor-cot"]], "Introduction for Scaffolding: A Framework for inference-time compute": [[11, "introduction-for-scaffolding-a-framework-for-inference-time-compute"]], "Introduction to KV Cache Transmission": [[39, null]], "JSON Configuration (for YAML/JSON configs)": [[91, "json-configuration-for-yaml-json-configs"]], "JSON Schema": [[90, "json-schema"], [90, "id1"]], "Jenkins stage names": [[37, "jenkins-stage-names"]], "KV Cache": [[79, "kv-cache"], [98, "kv-cache"], [113, "kv-cache"]], "KV Cache Connector": [[61, null], [92, null]], "KV Cache Exchange": [[17, "kv-cache-exchange"], [88, "kv-cache-exchange"]], "KV Cache Management: Pools, Blocks, and Events": [[116, null]], "KV Cache Manager": [[177, null]], "KV Cache Manager Introduction": [[177, "kv-cache-manager-introduction"]], "KV Cache Offloading": [[62, null]], "KV Cache Pool Management": [[116, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[124, "kv-cache-quantization-scaling-factors"]], "KV Cache Rewind": [[103, "kv-cache-rewind"]], "KV Cache Salting for Secure Reuse": [[93, "kv-cache-salting-for-secure-reuse"]], "KV Cache System": [[93, null]], "KV cache reuse": [[117, null]], "KVCacheManager": [[162, "kvcachemanager"]], "Kernel Level optimizations": [[13, "kernel-level-optimizations"]], "Kernel Optimizations": [[20, "kernel-optimizations"]], "Kernel fusion": [[13, "kernel-fusion"]], "Key Capabilities": [[158, "key-capabilities"]], "Key Components": [[39, "key-components"], [153, "key-components"]], "Key Feature:": [[170, "key-feature"]], "Key Features": [[85, "key-features"], [130, null]], "Key Features and Enhancements": [[160, "key-features-and-enhancements"], [160, "id2"], [160, "id7"], [160, "id12"], [160, "id17"], [160, "id18"], [160, "id20"], [160, "id23"], [160, "id28"], [160, "id33"], [160, "id38"], [160, "id45"], [160, "id50"], [160, "id56"], [160, "id62"], [160, "id68"], [160, "id72"], [160, "id74"]], "Key Features of Wide-EP": [[99, "key-features-of-wide-ep"]], "Key Metrics": [[26, "key-metrics"], [28, "key-metrics"], [29, "key-metrics"], [31, "key-metrics"], [32, "key-metrics"]], "Key Optimizations": [[13, "key-optimizations"]], "Known Issue": [[104, "known-issue"]], "Known Issues": [[147, "known-issues"], [153, "known-issues"], [160, "known-issues"], [160, "id6"], [160, "id11"], [160, "id16"], [160, "id21"], [160, "id25"], [160, "id32"], [160, "id44"], [160, "id55"], [160, "id61"], [160, "id77"]], "Known Limitations": [[106, "known-limitations"]], "LLM API Change Guide": [[36, null]], "LLM API Introduction": [[154, null]], "LLM API Options (YAML Configuration)": [[28, "llm-api-options-yaml-configuration"], [29, "llm-api-options-yaml-configuration"], [31, "llm-api-options-yaml-configuration"], [32, "llm-api-options-yaml-configuration"], [33, "llm-api-options-yaml-configuration"], [34, "llm-api-options-yaml-configuration"]], "LLM API with TensorRT Engine": [[152, null]], "LLM Common Customizations": [[47, null]], "LLM Examples": [[55, null]], "LLM Examples Introduction": [[52, null]], "LLM Models": [[150, "llm-models"]], "Latest HBM Memory": [[5, "latest-hbm-memory"]], "Launch Docker Container": [[159, "launch-docker-container"]], "Launch the NGC container": [[26, "launch-the-ngc-container"]], "Launch the Server (Eagle3 Speculative Decoding)": [[9, "launch-the-server-eagle3-speculative-decoding"]], "Launch the TensorRT LLM Server": [[28, "launch-the-tensorrt-llm-server"], [29, "launch-the-tensorrt-llm-server"], [30, "launch-the-tensorrt-llm-server"], [31, "launch-the-tensorrt-llm-server"], [32, "launch-the-tensorrt-llm-server"], [33, "launch-the-tensorrt-llm-server"], [34, "launch-the-tensorrt-llm-server"]], "Launch the TensorRT-LLM Server": [[21, "launch-the-tensorrt-llm-server"]], "Launching TensorRT LLM Serve": [[21, "launching-tensorrt-llm-serve"], [21, "id2"]], "Launching disaggregated servers on SLURM clusters": [[88, "launching-disaggregated-servers-on-slurm-clusters"]], "Launching the TensorRT LLM docker container": [[21, "launching-the-tensorrt-llm-docker-container"]], "Launching the server": [[18, "launching-the-server"]], "LayerNorm Weights": [[124, "layernorm-weights"]], "Layers": [[142, null]], "Limitations": [[121, "limitations"], [160, "limitations"]], "Limitations & Patterns": [[92, "limitations-patterns"]], "Limitations and Caveats": [[42, "limitations-and-caveats"], [132, "limitations-and-caveats"]], "Limitations and tips": [[168, "limitations-and-tips"]], "Limited Attention Window Size": [[93, "limited-attention-window-size"]], "Linear": [[142, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT LLM C++ Runtime": [[106, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[43, "llama-3-1-405b"], [125, "llama-3-1-405b"]], "Llama 3.1 70B": [[125, "llama-3-1-70b"]], "Llama 3.1 8B": [[43, "llama-3-1-8b"]], "Llama 3.3 70B": [[43, "llama-3-3-70b"]], "Llama 4 Maverick": [[43, "llama-4-maverick"]], "Llama 4 Scout": [[43, "llama-4-scout"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[6, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[3, "llama-70b-on-h200-up-to-6-7x-a100"]], "LoRA (Low-Rank Adaptation)": [[95, null], [173, null]], "LoRA Module id mapping": [[118, "lora-module-id-mapping"]], "LoRA arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[118, "lora-tensor-format-details"]], "LoRA with Quantization": [[95, "lora-with-quantization"], [173, "lora-with-quantization"]], "LoRA with tensor parallel": [[118, "lora-with-tensor-parallel"]], "Loading function": [[126, "loading-function"]], "Logging Level": [[83, null], [167, null]], "Logits arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Logits processor": [[102, "logits-processor"]], "Long Sequences": [[94, null]], "Lookahead Decoding": [[121, "lookahead-decoding"]], "LoraCache configuration": [[118, "loracache-configuration"]], "Low Latency Benchmark": [[132, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[135, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[132, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low precision AlltoAll": [[12, "low-precision-alltoall"]], "Low-Precision-AllReduce": [[119, null]], "Low-latency Use-Case": [[21, "low-latency-use-case"]], "Lower precision": [[12, "lower-precision"]], "MLA Layers Optimizations": [[15, "mla-layers-optimizations"]], "MLA chunked context": [[2, "mla-chunked-context"]], "MLP": [[142, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[124, "mlp-weights"]], "MLPerf on H100 with FP8": [[4, "mlperf-on-h100-with-fp8"]], "MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD with errorcode 1.": [[154, "mpi-abort-was-invoked-on-rank-1-in-communicator-mpi-comm-world-with-errorcode-1"]], "MQA / GQA": [[93, "mqa-gqa"]], "MTP": [[13, "mtp"], [103, "mtp"]], "MTP Eagle": [[14, "mtp-eagle"]], "MTP LM head tensor parallelism": [[12, "mtp-lm-head-tensor-parallelism"]], "MTP Modules": [[14, "mtp-modules"]], "MTP Vanilla": [[14, "mtp-vanilla"]], "MTP for inference": [[14, "mtp-for-inference"]], "MTP implementation in TensorRT LLM": [[14, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[14, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[124, "make-evaluation"]], "Make Grammar Computation Capturable by CUDA Graph": [[10, "make-grammar-computation-capturable-by-cuda-graph"]], "Manual TP Sharding Configuration": [[166, "manual-tp-sharding-configuration"]], "Mark Tensors As Output": [[111, "mark-tensors-as-output"]], "Mathematical Modeling": [[8, "mathematical-modeling"]], "Max Throughput Benchmark": [[132, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[140, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Max-Throughput Use Case": [[21, "max-throughput-use-case"]], "Maximum Attention Window Size": [[140, "maximum-attention-window-size"]], "Measurement Methodology": [[17, "measurement-methodology"]], "Medusa": [[121, "medusa"]], "Medusa Tree": [[121, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[147, null]], "Memory pool": [[147, "memory-pool"]], "Methodology Introduction": [[26, "methodology-introduction"]], "Metrics Endpoint": [[27, "metrics-endpoint"]], "Miscellaneous": [[16, "miscellaneous"]], "Mixed ETP": [[13, "mixed-etp"]], "Mixture of Experts (MoE)": [[99, "mixture-of-experts-moe"], [112, "mixture-of-experts-moe"]], "MoE Auxiliary Kernels": [[20, "moe-auxiliary-kernels"]], "MoE Backend Support Matrix": [[28, "moe-backend-support-matrix"], [29, "moe-backend-support-matrix"]], "MoE Layers Optimizations": [[15, "moe-layers-optimizations"]], "Model Architecture": [[13, "model-architecture"]], "Model Configuration": [[114, "model-configuration"], [156, "model-configuration"], [161, "model-configuration"]], "Model Definition": [[125, null], [156, "model-definition"], [161, "model-definition"]], "Model Engine": [[125, "model-engine"], [162, "model-engine"]], "Model Input": [[154, "model-input"]], "Model Recipes": [[35, null]], "Model Registration": [[156, "model-registration"], [161, "model-registration"]], "Model Support Matrix": [[96, "model-support-matrix"]], "Model Supported Matrix": [[100, "model-supported-matrix"]], "Model Updates": [[160, "model-updates"], [160, "id35"], [160, "id40"], [160, "id47"], [160, "id52"], [160, "id58"], [160, "id64"], [160, "id70"], [160, "id73"], [160, "id75"]], "Model-Feature Support Matrix(Key Models)": [[157, "model-feature-support-matrix-key-models"]], "Model-Specific Deployment Guides": [[35, "model-specific-deployment-guides"]], "Models": [[28, "models"], [29, "models"], [30, "models"], [31, "models"], [32, "models"], [33, "models"], [34, "models"], [105, null], [143, null]], "Models (PyTorch Backend)": [[150, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[150, "models-tensorrt-backend"]], "Models with customized key names": [[126, "models-with-customized-key-names"]], "Models with customized weight layout": [[126, "models-with-customized-weight-layout"]], "Modifications to Upper-Level Runtime Logic": [[39, "modifications-to-upper-level-runtime-logic"]], "Modifying Existing Methods": [[36, "modifying-existing-methods"]], "Modifying LLM Class Methods": [[36, "modifying-llm-class-methods"]], "Modifying LLM Constructor Arguments": [[36, "modifying-llm-constructor-arguments"]], "Module-level Parallelism Guide": [[99, "module-level-parallelism-guide"]], "More kernel overlap, fusion and optimization": [[12, "more-kernel-overlap-fusion-and-optimization"]], "Motivation": [[10, "motivation"], [17, "motivation"], [88, "motivation"], [101, "motivation"]], "Motivation and Background": [[8, "motivation-and-background"]], "Motivation for Wide-EP": [[99, "motivation-for-wide-ep"]], "Motivation for large-scale EP": [[16, "motivation-for-large-scale-ep"]], "Motivation of Dynasor-CoT": [[11, "motivation-of-dynasor-cot"]], "Motivation of EP communication kernels for GB200": [[16, "motivation-of-ep-communication-kernels-for-gb200"]], "Multi-GPU and Multi-Node Support": [[125, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[79, null], [113, null]], "Multi-LoRA Support": [[95, "multi-lora-support"], [173, "multi-lora-support"]], "Multi-Modal Models 3": [[150, "multi-modal-models"]], "Multi-Token Prediction (MTP)": [[20, "multi-token-prediction-mtp"]], "Multi-backend Support": [[17, "multi-backend-support"], [88, "multi-backend-support"]], "Multi-node Serving with Slurm": [[27, "multi-node-serving-with-slurm"]], "Multi-streams": [[13, "multi-streams"]], "Multimodal Benchmarking": [[26, "multimodal-benchmarking"]], "Multimodal Chat API": [[27, "multimodal-chat-api"]], "Multimodal Feature Support Matrix (PyTorch Backend)": [[148, null], [157, "multimodal-feature-support-matrix-pytorch-backend"]], "Multimodal Modality Coverage": [[27, "multimodal-modality-coverage"]], "Multimodal Serving": [[27, "multimodal-serving"]], "Multimodal Serving and Benchmarking": [[26, "multimodal-serving-and-benchmarking"]], "Multimodal Support in TensorRT LLM": [[96, null]], "Multiple Profiles": [[139, "multiple-profiles"]], "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM": [[19, null]], "NGram": [[103, "ngram"], [121, "ngram"]], "NVFP4 (Blackwell)": [[149, "nvfp4-blackwell"]], "Named Arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Natively supported models": [[126, "natively-supported-models"]], "NeMo LoRA Format": [[95, "nemo-lora-format"], [173, "nemo-lora-format"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[6, null]], "Next Steps": [[159, "next-steps"]], "Normalization": [[142, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[111, "note-on-context-outputs"]], "Numerical Precision": [[149, null]], "Observation over GSM8K dataset": [[16, "observation-over-gsm8k-dataset"]], "Observations over one machine translation dataset": [[16, "observations-over-one-machine-translation-dataset"]], "Obtaining Arbitrary Output Tensors": [[111, "obtaining-arbitrary-output-tensors"]], "Offline API: LLM API": [[90, "offline-api-llm-api"]], "Offline EP Load Balancer": [[16, "offline-ep-load-balancer"], [16, "id1"]], "Offline Quantization with ModelOpt": [[100, "offline-quantization-with-modelopt"]], "Offloading to host memory": [[117, "offloading-to-host-memory"]], "Online API: trtllm-serve": [[90, "online-api-trtllm-serve"]], "Online EP Load Balancer": [[16, "online-ep-load-balancer"], [16, "id2"]], "Online Serving Examples": [[77, null]], "Only collect specific iterations": [[41, "only-collect-specific-iterations"], [131, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[72, null]], "OpenAI Chat Client for Multimodal": [[73, null]], "OpenAI Completion Client": [[74, null]], "OpenAI Completion Client with JSON Schema": [[76, null]], "OpenAI-Compatible Server via trtllm-serve": [[96, "openai-compatible-server-via-trtllm-serve"]], "Openai Completion Client For Lora": [[75, null]], "Operation Fusion": [[104, "operation-fusion"]], "Optimization Highlights": [[20, "optimization-highlights"]], "Optimizations": [[96, "optimizations"]], "Optimize PyTorch native copy and concat using torch.compile": [[12, "optimize-pytorch-native-copy-and-concat-using-torch-compile"]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[15, null]], "Option 1. Use weekly release NGC docker image": [[18, "option-1-use-weekly-release-ngc-docker-image"]], "Option 1: Build TensorRT LLM in One Step": [[106, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[106, "option-1-full-build-with-c-compilation"]], "Option 2. Build TensorRT LLM Docker image (Alternative way)": [[18, "option-2-build-tensorrt-llm-docker-image-alternative-way"]], "Option 2: Container for building TensorRT LLM Step-by-Step": [[106, "option-2-container-for-building-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[106, "option-2-python-only-build-without-c-compilation"]], "Options": [[78, "options"]], "Other Build Modes": [[132, "other-build-modes"]], "Out of memory issues": [[2, "out-of-memory-issues"]], "Out-of-Tree Models": [[156, "out-of-tree-models"], [161, "out-of-tree-models"]], "Overlap Optimization": [[17, "overlap-optimization"], [88, "overlap-optimization"]], "Overlap Scheduler": [[40, "overlap-scheduler"], [97, null], [174, null]], "Overlap kernels using programmatic dependent launch (PDL)": [[12, "overlap-kernels-using-programmatic-dependent-launch-pdl"]], "Overriding Docker Compose configuration": [[38, "overriding-docker-compose-configuration"]], "Overview": [[12, "overview"], [36, "overview"], [43, null], [87, "overview"], [92, "overview"], [114, "overview"], [124, "overview"], [126, "overview"], [127, "overview"], [158, null], [172, "overview"]], "Overview of Parallelism Strategies": [[99, "overview-of-parallelism-strategies"]], "Packed Tensors": [[79, "packed-tensors"]], "Padded and Packed Tensors": [[113, "padded-and-packed-tensors"]], "Page": [[116, "page"]], "Paged Attention, IFB, and Request Scheduling": [[98, null]], "Paged Context Attention": [[139, "paged-context-attention"]], "Paged KV Cache": [[79, "paged-kv-cache"], [98, "paged-kv-cache"], [113, "paged-kv-cache"]], "Parallel strategy": [[15, "parallel-strategy"]], "Parallelism Mapping Support": [[132, "parallelism-mapping-support"]], "Parallelism Strategy": [[13, "parallelism-strategy"]], "Parallelism in TensorRT LLM": [[99, null]], "Pareto Analysis: Throughput-Latency Trade-off Optimization": [[8, "pareto-analysis-throughput-latency-trade-off-optimization"]], "Partial Reuse": [[93, "partial-reuse"]], "Pattern and Pattern Manager": [[115, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[125, "pattern-matching-and-fusion"]], "Performance": [[7, "performance"], [139, "performance"]], "Performance Analysis": [[41, null], [131, null]], "Performance Analysis: Baseline vs. ADP Balance": [[8, "performance-analysis-baseline-vs-adp-balance"]], "Performance Improvements": [[121, "performance-improvements"]], "Performance Optimization Tips": [[80, "performance-optimization-tips"], [164, "performance-optimization-tips"]], "Performance Results": [[8, "performance-results"]], "Performance Studies": [[17, "performance-studies"]], "Performance Study": [[19, "performance-study"]], "Performance Summary": [[8, "performance-summary"]], "Performance Tuning": [[18, "performance-tuning"], [104, "performance-tuning"]], "Performance Tuning Guide": [[136, null]], "Performance and Accuracy Considerations": [[119, "performance-and-accuracy-considerations"]], "Performance and Analysis": [[10, "performance-and-analysis"]], "Performance expectations": [[117, "performance-expectations"]], "Performance study": [[16, "performance-study"]], "Performance with GEMM + SwiGLU Fusion": [[135, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[139, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[135, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[135, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[139, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[135, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[139, "performance-with-multiple-profiles"]], "Persistence mode": [[42, "persistence-mode"], [132, "persistence-mode"]], "Piecewise CUDA Graph": [[104, "piecewise-cuda-graph"]], "Piecewise CUDA Graph & Generation Only CUDA Graph": [[104, "piecewise-cuda-graph-generation-only-cuda-graph"]], "Piecewise CUDA Graph Padding": [[104, "piecewise-cuda-graph-padding"]], "Pipeline Parallel Reduce Scatter Optimization": [[139, "pipeline-parallel-reduce-scatter-optimization"]], "Pipeline Parallelism (PP)": [[99, "pipeline-parallelism-pp"]], "Plugin": [[144, null]], "Plugin config arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[125, "plugins"]], "Pool": [[116, "pool"]], "Pooling": [[142, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[126, "postprocessing-functions"]], "Pre-built release container images on NGC": [[107, null]], "Precision Strategy": [[13, "precision-strategy"]], "Precision Support": [[86, "precision-support"], [171, "precision-support"]], "Precision strategy": [[15, "precision-strategy"]], "Prepare": [[129, "prepare"]], "Prepare Dataset": [[133, "prepare-dataset"]], "Prepare Docker image": [[30, "prepare-docker-image"]], "Prepare the TensorRT LLM Checkpoint": [[124, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[42, "preparing-a-dataset"], [43, "preparing-a-dataset"], [132, "preparing-a-dataset"]], "Prerequisite Knowledge": [[136, "prerequisite-knowledge"], [137, null]], "Prerequisites": [[9, "prerequisites"], [18, "prerequisites"], [21, "prerequisites"], [28, "prerequisites"], [29, "prerequisites"], [30, "prerequisites"], [31, "prerequisites"], [32, "prerequisites"], [33, "prerequisites"], [34, "prerequisites"], [106, "prerequisites"], [156, "prerequisites"], [161, "prerequisites"]], "Prerequisites: Install TensorRT LLM and download models": [[2, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[41, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"], [131, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Promoting an API from Beta to Committed": [[36, "promoting-an-api-from-beta-to-committed"]], "Prototype Features": [[153, "prototype-features"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[13, null]], "PyExecutor": [[162, "pyexecutor"]], "PyTorch Backend": [[153, null]], "Python Bindings for the Executor API": [[111, "python-bindings-for-the-executor-api"]], "Python Interface": [[16, "python-interface"]], "Python runtime (Not recommended to be used)": [[147, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[47, "quantization"], [100, null], [145, null], [175, null]], "Quantization APIs": [[127, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[149, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT LLM": [[100, "quantization-in-tensorrt-llm"]], "Quantization in TensorRT-LLM": [[7, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[42, "quantization-in-the-pytorch-flow"], [132, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[135, "quantized-kv-cache"]], "Query the OpenAI-compatible API Endpoint": [[30, "query-the-openai-compatible-api-endpoint"]], "Quick Health Check": [[9, "quick-health-check"]], "Quick Links": [[100, "quick-links"], [105, null]], "Quick Start": [[103, "quick-start"], [153, "quick-start"]], "Quick Start Example": [[154, "quick-start-example"]], "Quick Start Guide": [[159, null]], "Quick Start for Popular Models": [[35, "quick-start-for-popular-models"]], "Quick start": [[96, "quick-start"], [168, "quick-start"]], "Quickstart": [[132, "quickstart"]], "Qwen 3": [[17, "qwen-3"]], "Qwen3-235B-A22B": [[43, "qwen3-235b-a22b"]], "Qwen3-30B-A3B": [[43, "qwen3-30b-a3b"]], "Rank Weights": [[124, "rank-weights"]], "Ray Orchestrator (Prototype)": [[101, null]], "Re-balanced the sparse experts": [[13, "re-balanced-the-sparse-experts"]], "Re-inplace Optimization": [[104, "re-inplace-optimization"]], "ReDrafter": [[121, "redrafter"]], "Recommended Performance Settings": [[28, "recommended-performance-settings"], [29, "recommended-performance-settings"], [31, "recommended-performance-settings"], [32, "recommended-performance-settings"], [33, "recommended-performance-settings"], [34, "recommended-performance-settings"]], "Recompilation": [[104, "recompilation"]], "Reduce Binding and Inter-Process Communication Overhead": [[20, "reduce-binding-and-inter-process-communication-overhead"]], "Reduce Norm Fusion Plugin for Llama models:": [[139, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[135, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[11, "reference"], [123, "reference"]], "References": [[97, "references"], [99, "references"], [174, "references"]], "Regular expression": [[90, "regular-expression"], [90, "id2"]], "Relative Attention Bias (RAB)": [[113, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[13, "relax-acceptance-verification"]], "Relaxed Acceptance": [[14, "relaxed-acceptance"]], "Release Notes": [[160, null]], "Reproducing Benchmarked Results": [[43, "reproducing-benchmarked-results"]], "Reproducing Steps": [[17, "reproducing-steps"]], "Reproducing steps": [[2, "reproducing-steps"], [16, "reproducing-steps"]], "Request Additional Output": [[111, "request-additional-output"]], "Request Time Breakdown": [[26, "request-time-breakdown"]], "ResourceManager": [[162, "resourcemanager"]], "Results": [[133, "results"]], "Retention Policy": [[93, "retention-policy"]], "Rethink network structure": [[12, "rethink-network-structure"]], "Reuse Across Requests": [[93, "reuse-across-requests"]], "Revisiting Paged Context Attention and Context Chunking": [[98, "revisiting-paged-context-attention-and-context-chunking"], [138, "revisiting-paged-context-attention-and-context-chunking"]], "Roadmap": [[85, "roadmap"], [101, "roadmap"], [170, "roadmap"]], "Rotary Positional Embedding (RoPE)": [[79, "rotary-positional-embedding-rope"], [113, "rotary-positional-embedding-rope"]], "RouterGEMM": [[13, "routergemm"]], "Run Docker Container": [[28, "run-docker-container"], [29, "run-docker-container"], [31, "run-docker-container"], [32, "run-docker-container"], [33, "run-docker-container"], [34, "run-docker-container"]], "Run LLM-API with pytorch backend on Slurm": [[64, null]], "Run Offline Inference with LLM API": [[159, "run-offline-inference-with-llm-api"]], "Run benchmarking with trtllm-serve": [[26, null]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[118, null]], "Run the benchmark": [[26, "run-the-benchmark"]], "Run trtllm-bench": [[95, "run-trtllm-bench"], [173, "run-trtllm-bench"]], "Run trtllm-bench with pytorch backend on Slurm": [[65, null]], "Run trtllm-serve with pytorch backend on Slurm": [[66, null]], "Run with trtllm-bench": [[96, "run-with-trtllm-bench"]], "Running Evaluations to Verify Accuracy (Optional)": [[28, "running-evaluations-to-verify-accuracy-optional"], [29, "running-evaluations-to-verify-accuracy-optional"], [31, "running-evaluations-to-verify-accuracy-optional"], [32, "running-evaluations-to-verify-accuracy-optional"]], "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)": [[9, null]], "Running Pre-quantized Models": [[100, "running-pre-quantized-models"]], "Running Tests": [[36, "running-tests"]], "Running Throughput and Latency Benchmarks": [[133, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[122, null]], "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM": [[21, null]], "Running multi-modal models in the PyTorch Workflow": [[42, "running-multi-modal-models-in-the-pytorch-workflow"], [132, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[43, "running-the-benchmark"]], "Running the TensorRT LLM Server": [[21, "running-the-tensorrt-llm-server"]], "Running with the PyTorch Workflow": [[42, "running-with-the-pytorch-workflow"], [132, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [125, "runtime"], [146, null]], "Runtime Configuration Examples": [[68, null]], "Runtime Customization": [[47, "runtime-customization"]], "Runtime Integrations": [[86, "runtime-integrations"], [171, "runtime-integrations"]], "Runtime Optimizations": [[15, "runtime-optimizations"], [40, "runtime-optimizations"]], "RuntimeError: only rank 0 can start multi-node session, got 1": [[154, "runtimeerror-only-rank-0-can-start-multi-node-session-got-1"]], "Sample Chat Completions Request": [[9, "sample-chat-completions-request"]], "Sampling": [[47, "sampling"], [102, null], [176, null]], "Sampling Parameters": [[114, "sampling-parameters"]], "Sampling Techniques Showcase": [[69, null]], "ScaffoldingLlm": [[11, "scaffoldingllm"]], "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)": [[16, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)": [[20, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)": [[12, null]], "Scaling factor(s)": [[79, "scaling-factor-s"], [113, "scaling-factor-s"]], "Scheduler": [[162, "scheduler"], [178, null]], "Scheduler Introduction": [[178, "scheduler-introduction"]], "Scheduling Strategies for Load Balancing": [[8, "scheduling-strategies-for-load-balancing"]], "Seamless Model Deployment from PyTorch to TensorRT LLM": [[85, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "Seamless Model Deployment from PyTorch to TensorRT-LLM": [[170, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "See also": [[168, "see-also"]], "Selecting Triton as the MoE backend": [[21, "selecting-triton-as-the-moe-backend"], [21, "id3"]], "Sender and Receiver": [[39, "sender-and-receiver"]], "Sending Requests with Different Beam Widths": [[111, "sending-requests-with-different-beam-widths"]], "Serving with trtllm-serve": [[168, null]], "Set power limits": [[42, "set-power-limits"], [132, "set-power-limits"]], "Setting": [[8, "setting"]], "Setting up Multimodal Serving": [[26, "setting-up-multimodal-serving"]], "Sharding configuration": [[166, "sharding-configuration"]], "Show code": [[28, null], [28, null], [29, null], [29, null], [31, null], [32, null], [33, null], [34, null]], "Single LoRA Adapter": [[95, "single-lora-adapter"], [173, "single-lora-adapter"]], "Single node hanging when using docker run --net=host": [[154, "single-node-hanging-when-using-docker-run-net-host"]], "Situations that can prevent kv cache reuse": [[117, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention": [[94, "sliding-window-attention"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[79, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"], [113, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Slurm": [[55, "slurm"]], "Smart Router": [[13, "smart-router"]], "Software": [[150, "software"]], "Sparse Attention": [[70, null]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[13, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Decoding": [[10, "speculative-decoding"], [71, null], [93, "speculative-decoding"], [103, null]], "Speculative Sampling": [[121, null]], "Speculative decoding arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[7, null]], "Speed-up for the First Turn": [[19, "speed-up-for-the-first-turn"]], "Start the TensorRT LLM Container": [[9, "start-the-tensorrt-llm-container"]], "Start the trtllm-serve service": [[26, "start-the-trtllm-serve-service"]], "Starting a Server": [[27, "starting-a-server"]], "Starting the Server": [[95, "starting-the-server"], [173, "starting-the-server"]], "Step 1. Write Modeling Part": [[123, "step-1-write-modeling-part"]], "Step 1: Clone the repository": [[18, "step-1-clone-the-repository"]], "Step 1: Create the Checkpoint Loader": [[87, "step-1-create-the-checkpoint-loader"], [172, "step-1-create-the-checkpoint-loader"]], "Step 1: Run inference and collect statistics": [[16, "step-1-run-inference-and-collect-statistics"]], "Step 2. Implement Weight Conversion": [[123, "step-2-implement-weight-conversion"]], "Step 2: Create the Checkpoint Weight Loader": [[87, "step-2-create-the-checkpoint-weight-loader"], [172, "step-2-create-the-checkpoint-weight-loader"]], "Step 2: Generate the EPLB configuration": [[16, "step-2-generate-the-eplb-configuration"]], "Step 2: Prepare the TensorRT LLM release Docker image": [[18, "step-2-prepare-the-tensorrt-llm-release-docker-image"]], "Step 3. Register New Model": [[123, "step-3-register-new-model"]], "Step 3: (Optional) Tag and push the Docker image to your registry": [[18, "step-3-optional-tag-and-push-the-docker-image-to-your-registry"]], "Step 3: Create the Checkpoint Config Loader": [[87, "step-3-create-the-checkpoint-config-loader"], [172, "step-3-create-the-checkpoint-config-loader"]], "Step 3: Run inference with the EPLB configuration": [[16, "step-3-run-inference-with-the-eplb-configuration"]], "Step 4. Verify New Model": [[123, "step-4-verify-new-model"]], "Step 4: Create the Checkpoint Weight Mapper": [[87, "step-4-create-the-checkpoint-weight-mapper"], [172, "step-4-create-the-checkpoint-weight-mapper"]], "Step 4: Start the TensorRT LLM server": [[18, "step-4-start-the-tensorrt-llm-server"]], "Step 5: Test the server with a sample request": [[18, "step-5-test-the-server-with-a-sample-request"]], "Step 6: (Optional) Monitor server logs": [[18, "step-6-optional-monitor-server-logs"]], "Step 7: (Optional) Stop the server": [[18, "step-7-optional-stop-the-server"]], "Step-by-Step Guide": [[156, "step-by-step-guide"], [161, "step-by-step-guide"]], "StreamingLLM": [[79, "streamingllm"], [113, "streamingllm"]], "Structural tag": [[90, "structural-tag"], [90, "id4"]], "Structured output with guided decoding": [[111, "structured-output-with-guided-decoding"]], "Summary": [[132, "summary"]], "Summary of Configuration Option Recommendations:": [[135, "summary-of-configuration-option-recommendations"], [139, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[85, "support-matrix"], [86, null], [150, null], [170, "support-matrix"], [171, null]], "Support Models": [[86, "support-models"], [171, "support-models"]], "Support Stream Interval": [[20, "support-stream-interval"]], "Support matrix": [[149, "support-matrix"]], "Supported C++ Header Files": [[106, "supported-c-header-files"]], "Supported Models": [[91, "supported-models"], [157, null]], "Supported Quantization Modes": [[42, "supported-quantization-modes"], [132, "supported-quantization-modes"]], "Syntax": [[24, "syntax"], [27, "syntax"]], "System Level optimizations": [[13, "system-level-optimizations"]], "TRTLLM Backend": [[13, "trtllm-backend"]], "TRTLLM bench with LORA": [[95, "trtllm-bench-with-lora"], [173, "trtllm-bench-with-lora"]], "TRTLLM serve with LoRA": [[95, "trtllm-serve-with-lora"], [173, "trtllm-serve-with-lora"]], "Table of Contents": [[2, "table-of-contents"], [8, "table-of-contents"], [10, "table-of-contents"], [11, "table-of-contents"], [12, "table-of-contents"], [13, "table-of-contents"], [14, "table-of-contents"], [15, "table-of-contents"], [16, "table-of-contents"], [19, "table-of-contents"], [20, "table-of-contents"], [37, "table-of-contents"], [39, "table-of-contents"], [87, "table-of-contents"], [95, "table-of-contents"], [104, "table-of-contents"], [136, "table-of-contents"], [137, "table-of-contents"], [156, "table-of-contents"], [161, "table-of-contents"], [172, "table-of-contents"], [173, "table-of-contents"]], "Target Model": [[10, "target-model"]], "Technical Detail: The QuantMode Flags": [[149, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[112, "tensor-parallel-vs-expert-parallel"]], "Tensor Parallelism (TP)": [[99, "tensor-parallelism-tp"], [99, "id1"]], "Tensor-Related Methods": [[115, "tensor-related-methods"]], "TensorRT Compiler": [[125, "tensorrt-compiler"]], "TensorRT LLM Benchmarking": [[42, null]], "TensorRT LLM Checkpoint": [[124, null]], "TensorRT LLM Custom Backend": [[104, "tensorrt-llm-custom-backend"]], "TensorRT-LLM Benchmarking": [[132, null]], "TensorRT-LLM Build Workflow": [[127, null]], "TensorRT-LLM Model Weights Loader": [[126, null]], "TensorRT-LLM Release 0.10.0": [[160, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[160, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[160, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[160, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[160, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[160, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[160, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[160, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[160, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[160, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[160, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[160, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.20.0": [[160, "tensorrt-llm-release-0-20-0"]], "TensorRT-LLM Release 0.21.0": [[160, "tensorrt-llm-release-0-21-0"]], "TensorRT-LLM Release 0.7.1": [[160, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[160, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[160, "tensorrt-llm-release-0-9-0"]], "TensorRT-LLM Release 1.0": [[160, "tensorrt-llm-release-1-0"]], "Test definitions": [[37, "test-definitions"]], "Test the Server with a Sample Request": [[21, "test-the-server-with-a-sample-request"]], "Testing API Endpoint": [[28, "testing-api-endpoint"], [29, "testing-api-endpoint"], [31, "testing-api-endpoint"], [32, "testing-api-endpoint"], [33, "testing-api-endpoint"], [34, "testing-api-endpoint"]], "Testing Helix with TensorRT-LLM": [[91, "testing-helix-with-tensorrt-llm"]], "The Basics": [[93, "the-basics"]], "The Executor Class": [[111, "the-executor-class"]], "The Features of the TrtllmAttention Backend": [[79, "the-features-of-the-trtllmattention-backend"]], "The Request Class": [[111, "the-request-class"]], "The Response Class": [[111, "the-response-class"]], "The Result Class": [[111, "the-result-class"]], "The effect of EP Load Balancer": [[16, "the-effect-of-ep-load-balancer"], [16, "id3"]], "The schedulers": [[98, "the-schedulers"]], "The \u201cProbe\u201d technique": [[11, "the-probe-technique"]], "Theoretical Analysis and Modeling": [[8, "theoretical-analysis-and-modeling"]], "Throughput Benchmarking": [[42, "throughput-benchmarking"], [132, "throughput-benchmarking"]], "Throughput Measurements": [[43, "throughput-measurements"]], "Time Per Output Token (TPOT) and Inter-Token Latency (ITL)": [[26, "time-per-output-token-tpot-and-inter-token-latency-itl"], [28, "time-per-output-token-tpot-and-inter-token-latency-itl"], [29, "time-per-output-token-tpot-and-inter-token-latency-itl"], [31, "time-per-output-token-tpot-and-inter-token-latency-itl"], [32, "time-per-output-token-tpot-and-inter-token-latency-itl"]], "Time to First Token (TTFT)": [[26, "time-to-first-token-ttft"], [28, "time-to-first-token-ttft"], [29, "time-to-first-token-ttft"], [31, "time-to-first-token-ttft"], [32, "time-to-first-token-ttft"]], "Tips": [[151, "tips"]], "Tips and Troubleshooting": [[154, "tips-and-troubleshooting"]], "Tips for Piecewise CUDA Graph": [[104, "tips-for-piecewise-cuda-graph"]], "Tokenizer Customization": [[47, "tokenizer-customization"]], "Tokens Per Second (TPS) or Output Token Throughput": [[26, "tokens-per-second-tps-or-output-token-throughput"], [28, "tokens-per-second-tps-or-output-token-throughput"], [29, "tokens-per-second-tps-or-output-token-throughput"], [31, "tokens-per-second-tps-or-output-token-throughput"], [32, "tokens-per-second-tps-or-output-token-throughput"]], "Top Level API": [[162, "top-level-api"]], "Topology Requirements": [[119, "topology-requirements"]], "Torch Compile & Piecewise CUDA Graph": [[104, null]], "Torch IR Optimization": [[104, "torch-ir-optimization"]], "Total Token Throughput": [[26, "total-token-throughput"], [28, "total-token-throughput"], [29, "total-token-throughput"], [31, "total-token-throughput"], [32, "total-token-throughput"]], "Trace Grammar State for Draft Token Proposal and Rejection": [[10, "trace-grammar-state-for-draft-token-proposal-and-rejection"]], "Tradeoff": [[97, "tradeoff"], [174, "tradeoff"]], "Transceiver": [[39, "transceiver"]], "Transfer Agent": [[39, "transfer-agent"]], "Translator": [[126, "translator"]], "Tree-based speculative decoding support": [[14, "tree-based-speculative-decoding-support"]], "Triggering CI Best Practices": [[37, "triggering-ci-best-practices"]], "Triggering Post-merge tests": [[37, "triggering-post-merge-tests"]], "Triton Inference Server": [[17, "triton-inference-server"]], "Trouble shooting": [[126, "trouble-shooting"]], "Troubleshooting": [[151, null]], "Troubleshooting Tips": [[18, "troubleshooting-tips"], [21, "troubleshooting-tips"], [28, "troubleshooting-tips"], [29, "troubleshooting-tips"], [31, "troubleshooting-tips"], [32, "troubleshooting-tips"], [33, "troubleshooting-tips"], [34, "troubleshooting-tips"]], "Troubleshooting Tips and Pitfalls To Avoid": [[133, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[88, "troubleshooting-and-faq"], [110, "troubleshooting-and-faq"]], "Troubleshooting: Data Race between Host and CUDA Callback": [[10, "troubleshooting-data-race-between-host-and-cuda-callback"]], "Troubleshooting: Deadlock by GIL and CUDA Mutex": [[10, "troubleshooting-deadlock-by-gil-and-cuda-mutex"]], "Tuning Case Study": [[138, "tuning-case-study"], [138, "id2"]], "Tuning Max Batch Size": [[138, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[138, null]], "Tuning Max Num Tokens": [[138, "tuning-max-num-tokens"]], "Two Challenges": [[10, "two-challenges"]], "Two Model Speculative Decoding Architecture": [[103, "two-model-speculative-decoding-architecture"]], "Types of Events": [[116, "types-of-events"]], "Understand inference time GPU memory usage": [[147, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[138, "understanding-the-tensorrt-llm-scheduler"]], "Unit Test: MLA Module Correctness": [[91, "unit-test-mla-module-correctness"]], "Unit tests": [[37, "unit-tests"]], "Upload the Docker Image to DockerHub": [[128, "upload-the-docker-image-to-dockerhub"]], "Usage": [[88, "usage"], [92, "usage"], [97, "usage"], [100, "usage"], [104, "usage"], [119, "usage"], [174, "usage"]], "Usage and Examples": [[24, "usage-and-examples"]], "Usage with trtllm-bench and trtllm-serve": [[103, "usage-with-trtllm-bench-and-trtllm-serve"]], "Use Cases": [[92, "use-cases"]], "Useful Build-Time Flags": [[139, null]], "Useful Runtime Options": [[140, null]], "User-provided drafting": [[103, "user-provided-drafting"]], "Using Checkpoint Loaders": [[87, "using-checkpoint-loaders"], [172, "using-checkpoint-loaders"]], "Using Dev Containers": [[38, null]], "Using Medusa with TensorRT-LLM": [[121, "using-medusa-with-tensorrt-llm"]], "Using test_to_stage_mapping.py": [[37, "using-test-to-stage-mapping-py"]], "Validated Networks for Benchmarking": [[42, "validated-networks-for-benchmarking"], [132, "validated-networks-for-benchmarking"]], "Variables": [[43, "variables"]], "Verification and Sampling": [[103, "verification-and-sampling"]], "Visualize the PyTorch profiler results": [[41, "visualize-the-pytorch-profiler-results"], [131, "visualize-the-pytorch-profiler-results"]], "Volume Mounts": [[38, "volume-mounts"]], "WIP: Enable more features by default": [[2, "wip-enable-more-features-by-default"]], "Waiving tests": [[37, "waiving-tests"]], "Weight Bindings": [[125, "weight-bindings"]], "Weight Loading": [[156, "weight-loading"], [161, "weight-loading"]], "Weights absorb and MQA": [[15, "weights-absorb-and-mqa"]], "Welcome to TensorRT LLM\u2019s Documentation!": [[105, null]], "What Can You Do With TensorRT LLM?": [[158, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[116, "what-triggers-an-event"]], "What is H100 FP8?": [[4, "what-is-h100-fp8"]], "What\u2019s coming next": [[7, "whats-coming-next"]], "When to Create Custom Components": [[87, "when-to-create-custom-components"], [172, "when-to-create-custom-components"]], "When to Use Graph Rewriting?": [[115, "when-to-use-graph-rewriting"]], "When to Use Helix": [[91, "when-to-use-helix"]], "Wide Expert Parallelism": [[28, "wide-expert-parallelism"]], "Wide Expert Parallelism (Wide-EP)": [[99, "wide-expert-parallelism-wide-ep"], [99, "id3"]], "WindowBlockManager/BlockManager": [[116, "windowblockmanager-blockmanager"]], "Worker": [[11, "worker"]], "Workflow": [[39, "workflow"], [126, "workflow"], [132, "workflow"]], "Workload Profile": [[13, "workload-profile"]], "World Configuration": [[114, "world-configuration"]], "XQA Optimization": [[79, "xqa-optimization"], [113, "xqa-optimization"]], "YAML Configuration": [[95, "yaml-configuration"], [95, "id1"], [173, "yaml-configuration"], [173, "id1"]], "YAML Configuration Files": [[82, "yaml-configuration-files"], [166, "yaml-configuration-files"]], "_prepare_draft_requests": [[103, "prepare-draft-requests"]], "_prepare_draft_tokens": [[103, "prepare-draft-tokens"]], "additional_model_outputs": [[78, "additional-model-outputs"]], "attention_backend": [[26, "attention-backend"], [28, "attention-backend"], [31, "attention-backend"], [32, "attention-backend"]], "bufferManager.h": [[1, "buffermanager-h"]], "build": [[22, "trtllm-bench-build"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "cnn_dailymail": [[24, "trtllm-eval-cnn-dailymail"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "cuda_graph_config": [[26, "cuda-graph-config"], [28, "cuda-graph-config"], [29, "cuda-graph-config"], [31, "cuda-graph-config"], [32, "cuda-graph-config"], [33, "cuda-graph-config"], [34, "cuda-graph-config"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "dataset": [[22, "dataset"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[27, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[27, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gpqa_diamond": [[24, "trtllm-eval-gpqa-diamond"]], "gpqa_extended": [[24, "trtllm-eval-gpqa-extended"]], "gpqa_main": [[24, "trtllm-eval-gpqa-main"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "gsm8k": [[24, "trtllm-eval-gsm8k"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "json_mode_eval": [[24, "trtllm-eval-json-mode-eval"]], "kv_cache_config": [[26, "kv-cache-config"], [28, "kv-cache-config"], [31, "kv-cache-config"], [32, "kv-cache-config"]], "kv_cache_config.free_gpu_memory_fraction": [[33, "kv-cache-config-free-gpu-memory-fraction"]], "kv_cache_free_gpu_memory_fraction": [[28, "kv-cache-free-gpu-memory-fraction"], [29, "kv-cache-free-gpu-memory-fraction"], [31, "kv-cache-free-gpu-memory-fraction"], [32, "kv-cache-free-gpu-memory-fraction"], [34, "kv-cache-free-gpu-memory-fraction"]], "latency": [[22, "trtllm-bench-latency"]], "logprobs": [[78, "logprobs"]], "longbench_v2": [[24, "trtllm-eval-longbench-v2"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "max_batch_size": [[28, "max-batch-size"], [29, "max-batch-size"], [31, "max-batch-size"], [32, "max-batch-size"], [33, "max-batch-size"], [34, "max-batch-size"], [98, "max-batch-size"]], "max_batch_size, max_seq_len and max_num_tokens": [[98, "max-batch-size-max-seq-len-and-max-num-tokens"]], "max_num_tokens": [[28, "max-num-tokens"], [29, "max-num-tokens"], [31, "max-num-tokens"], [32, "max-num-tokens"], [33, "max-num-tokens"], [34, "max-num-tokens"], [98, "max-num-tokens"]], "max_seq_len": [[28, "max-seq-len"], [29, "max-seq-len"], [31, "max-seq-len"], [32, "max-seq-len"], [33, "max-seq-len"], [34, "max-seq-len"], [98, "max-seq-len"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "mm_embedding_serve": [[27, "trtllm-serve-mm-embedding-serve"]], "mmlu": [[24, "trtllm-eval-mmlu"]], "mmmu": [[24, "trtllm-eval-mmmu"]], "modelConfig.h": [[1, "modelconfig-h"]], "moe_config": [[26, "moe-config"], [28, "moe-config"], [29, "moe-config"], [31, "moe-config"], [32, "moe-config"], [33, "moe-config"], [34, "moe-config"]], "moe_expert_parallel_size": [[28, "moe-expert-parallel-size"], [29, "moe-expert-parallel-size"], [31, "moe-expert-parallel-size"], [32, "moe-expert-parallel-size"], [33, "moe-expert-parallel-size"], [34, "moe-expert-parallel-size"]], "prepare_dataset": [[22, "prepare-dataset"]], "prepare_dataset.py": [[22, "prepare-dataset-py"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "prompt_logprobs": [[78, "prompt-logprobs"]], "rawEngine.h": [[1, "rawengine-h"]], "requires_shape_prop (bool, default: true)": [[166, "requires-shape-prop-bool-default-true"]], "return_context_logits": [[78, "return-context-logits"]], "return_generation_logits": [[78, "return-generation-logits"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[27, "trtllm-serve-serve"]], "sharding_dims (list, default: ['tp', 'ep', 'bmm'])": [[166, "sharding-dims-list-default-tp-ep-bmm"]], "sharding_source (list, default: ['manual', 'factory', 'heuristic'])": [[166, "sharding-source-list-default-manual-factory-heuristic"]], "simple_shard_only (bool, default: false)": [[166, "simple-shard-only-bool-default-false"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "support_partial_config (bool, default: true)": [[166, "support-partial-config-bool-default-true"]], "tensor.h": [[0, "tensor-h"]], "tensor_parallel_size": [[28, "tensor-parallel-size"], [29, "tensor-parallel-size"], [31, "tensor-parallel-size"], [32, "tensor-parallel-size"], [33, "tensor-parallel-size"], [34, "tensor-parallel-size"]], "throughput": [[22, "trtllm-bench-throughput"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "token_norm_dist": [[22, "token-norm-dist"]], "token_unif_dist": [[22, "token-unif-dist"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-bench": [[22, null], [22, "trtllm-bench"]], "trtllm-build": [[23, null]], "trtllm-eval": [[24, null], [24, "trtllm-eval"]], "trtllm-serve": [[17, "trtllm-serve"], [25, null], [27, null], [27, "trtllm-serve"], [88, "trtllm-serve"]], "trust_remote_code": [[28, "trust-remote-code"], [29, "trust-remote-code"], [31, "trust-remote-code"], [32, "trust-remote-code"], [33, "trust-remote-code"], [34, "trust-remote-code"]], "types.h": [[0, "types-h"]], "virtualMemory.h": [[1, "virtualmemory-h"]], "wo GEMM FP4 quantization": [[12, "wo-gemm-fp4-quantization"]], "worldConfig.h": [[1, "worldconfig-h"]], "\u26a1 State-of-the-Art Performance": [[158, "state-of-the-art-performance"]], "\ud83c\udfaf Comprehensive Model Support": [[158, "comprehensive-model-support"]], "\ud83d\udd25 Architected on Pytorch": [[158, "architected-on-pytorch"]], "\ud83d\udd27 Latest GPU Architecture Support": [[158, "latest-gpu-architecture-support"]], "\ud83d\ude80 Advanced Optimization & Production Features": [[158, "advanced-optimization-production-features"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog10_ADP_Balance_Strategy", "blogs/tech_blog/blog11_GPT_OSS_Eagle3", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM", "commands/trtllm-bench", "commands/trtllm-build", "commands/trtllm-eval", "commands/trtllm-serve/index", "commands/trtllm-serve/run-benchmark-with-trtllm-serve", "commands/trtllm-serve/trtllm-serve", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm", "deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-on-trtllm", "deployment-guide/index", "developer-guide/api-change", "developer-guide/ci-overview", "developer-guide/dev-containers", "developer-guide/kv-transfer", "developer-guide/overview", "developer-guide/perf-analysis", "developer-guide/perf-benchmarking", "developer-guide/perf-overview", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/dynamo_k8s_example", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/kvcacheconfig", "examples/kvcacheretentionconfig", "examples/llm_api_examples", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_distributed", "examples/llm_kv_cache_connector", "examples/llm_kv_cache_offloading", "examples/llm_logits_processor", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_runtime", "examples/llm_sampling", "examples/llm_sparse_attention", "examples/llm_speculative_decoding", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/openai_completion_client_for_lora", "examples/openai_completion_client_json_schema", "examples/trtllm_serve_examples", "features/additional-outputs", "features/attention", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench", "features/auto_deploy/advanced/example_run", "features/auto_deploy/advanced/expert_configurations", "features/auto_deploy/advanced/logging", "features/auto_deploy/advanced/workflow", "features/auto_deploy/auto-deploy", "features/auto_deploy/support_matrix", "features/checkpoint-loading", "features/disagg-serving", "features/feature-combination-matrix", "features/guided-decoding", "features/helix", "features/kv-cache-connector", "features/kvcache", "features/long-sequence", "features/lora", "features/multi-modality", "features/overlap-scheduler", "features/paged-attention-ifb-scheduler", "features/parallel-strategy", "features/quantization", "features/ray-orchestrator", "features/sampling", "features/speculative-decoding", "features/torch_compile_and_piecewise_cuda_graph", "index", "installation/build-from-source-linux", "installation/containers", "installation/index", "installation/linux", "legacy/advanced/disaggregated-service", "legacy/advanced/executor", "legacy/advanced/expert-parallelism", "legacy/advanced/gpt-attention", "legacy/advanced/gpt-runtime", "legacy/advanced/graph-rewriting", "legacy/advanced/kv-cache-management", "legacy/advanced/kv-cache-reuse", "legacy/advanced/lora", "legacy/advanced/lowprecision-pcie-allreduce", "legacy/advanced/open-sourced-cutlass-kernels", "legacy/advanced/speculative-decoding", "legacy/advanced/weight-streaming", "legacy/architecture/add-model", "legacy/architecture/checkpoint", "legacy/architecture/core-concepts", "legacy/architecture/model-weights-loader", "legacy/architecture/workflow", "legacy/dev-on-cloud/build-image-to-dockerhub", "legacy/dev-on-cloud/dev-on-runpod", "legacy/key-features", "legacy/performance/perf-analysis", "legacy/performance/perf-benchmarking", "legacy/performance/performance-tuning-guide/benchmarking-default-performance", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy", "legacy/performance/performance-tuning-guide/fp8-quantization", "legacy/performance/performance-tuning-guide/index", "legacy/performance/performance-tuning-guide/introduction", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "legacy/performance/performance-tuning-guide/useful-build-time-flags", "legacy/performance/performance-tuning-guide/useful-runtime-flags", "legacy/python-api/tensorrt_llm.functional", "legacy/python-api/tensorrt_llm.layers", "legacy/python-api/tensorrt_llm.models", "legacy/python-api/tensorrt_llm.plugin", "legacy/python-api/tensorrt_llm.quantization", "legacy/python-api/tensorrt_llm.runtime", "legacy/reference/memory", "legacy/reference/multimodal-feature-support-matrix", "legacy/reference/precision", "legacy/reference/support-matrix", "legacy/reference/troubleshooting", "legacy/tensorrt_quickstart", "legacy/torch", "llm-api/index", "llm-api/reference", "models/adding-new-model", "models/supported-models", "overview", "quick-start-guide", "release-notes", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench", "torch/auto_deploy/advanced/example_run", "torch/auto_deploy/advanced/expert_configurations", "torch/auto_deploy/advanced/logging", "torch/auto_deploy/advanced/serving_with_trtllm_serve", "torch/auto_deploy/advanced/workflow", "torch/auto_deploy/auto-deploy", "torch/auto_deploy/support_matrix", "torch/features/checkpoint_loading", "torch/features/lora", "torch/features/overlap_scheduler", "torch/features/quantization", "torch/features/sampling", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog10_ADP_Balance_Strategy.md", "blogs/tech_blog/blog11_GPT_OSS_Eagle3.md", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.md", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.md", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.md", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.md", "commands/trtllm-bench.rst", "commands/trtllm-build.rst", "commands/trtllm-eval.rst", "commands/trtllm-serve/index.rst", "commands/trtllm-serve/run-benchmark-with-trtllm-serve.md", "commands/trtllm-serve/trtllm-serve.rst", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md", "deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-on-trtllm.md", "deployment-guide/index.rst", "developer-guide/api-change.md", "developer-guide/ci-overview.md", "developer-guide/dev-containers.md", "developer-guide/kv-transfer.md", "developer-guide/overview.md", "developer-guide/perf-analysis.md", "developer-guide/perf-benchmarking.md", "developer-guide/perf-overview.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/dynamo_k8s_example.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/kvcacheconfig.md", "examples/kvcacheretentionconfig.md", "examples/llm_api_examples.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_distributed.rst", "examples/llm_kv_cache_connector.rst", "examples/llm_kv_cache_offloading.rst", "examples/llm_logits_processor.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_runtime.rst", "examples/llm_sampling.rst", "examples/llm_sparse_attention.rst", "examples/llm_speculative_decoding.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/openai_completion_client_for_lora.rst", "examples/openai_completion_client_json_schema.rst", "examples/trtllm_serve_examples.rst", "features/additional-outputs.md", "features/attention.md", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "features/auto_deploy/advanced/example_run.md", "features/auto_deploy/advanced/expert_configurations.md", "features/auto_deploy/advanced/logging.md", "features/auto_deploy/advanced/workflow.md", "features/auto_deploy/auto-deploy.md", "features/auto_deploy/support_matrix.md", "features/checkpoint-loading.md", "features/disagg-serving.md", "features/feature-combination-matrix.md", "features/guided-decoding.md", "features/helix.md", "features/kv-cache-connector.md", "features/kvcache.md", "features/long-sequence.md", "features/lora.md", "features/multi-modality.md", "features/overlap-scheduler.md", "features/paged-attention-ifb-scheduler.md", "features/parallel-strategy.md", "features/quantization.md", "features/ray-orchestrator.md", "features/sampling.md", "features/speculative-decoding.md", "features/torch_compile_and_piecewise_cuda_graph.md", "index.rst", "installation/build-from-source-linux.md", "installation/containers.md", "installation/index.rst", "installation/linux.md", "legacy/advanced/disaggregated-service.md", "legacy/advanced/executor.md", "legacy/advanced/expert-parallelism.md", "legacy/advanced/gpt-attention.md", "legacy/advanced/gpt-runtime.md", "legacy/advanced/graph-rewriting.md", "legacy/advanced/kv-cache-management.md", "legacy/advanced/kv-cache-reuse.md", "legacy/advanced/lora.md", "legacy/advanced/lowprecision-pcie-allreduce.md", "legacy/advanced/open-sourced-cutlass-kernels.md", "legacy/advanced/speculative-decoding.md", "legacy/advanced/weight-streaming.md", "legacy/architecture/add-model.md", "legacy/architecture/checkpoint.md", "legacy/architecture/core-concepts.md", "legacy/architecture/model-weights-loader.md", "legacy/architecture/workflow.md", "legacy/dev-on-cloud/build-image-to-dockerhub.md", "legacy/dev-on-cloud/dev-on-runpod.md", "legacy/key-features.md", "legacy/performance/perf-analysis.md", "legacy/performance/perf-benchmarking.md", "legacy/performance/performance-tuning-guide/benchmarking-default-performance.md", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "legacy/performance/performance-tuning-guide/fp8-quantization.md", "legacy/performance/performance-tuning-guide/index.rst", "legacy/performance/performance-tuning-guide/introduction.md", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "legacy/performance/performance-tuning-guide/useful-build-time-flags.md", "legacy/performance/performance-tuning-guide/useful-runtime-flags.md", "legacy/python-api/tensorrt_llm.functional.rst", "legacy/python-api/tensorrt_llm.layers.rst", "legacy/python-api/tensorrt_llm.models.rst", "legacy/python-api/tensorrt_llm.plugin.rst", "legacy/python-api/tensorrt_llm.quantization.rst", "legacy/python-api/tensorrt_llm.runtime.rst", "legacy/reference/memory.md", "legacy/reference/multimodal-feature-support-matrix.md", "legacy/reference/precision.md", "legacy/reference/support-matrix.md", "legacy/reference/troubleshooting.md", "legacy/tensorrt_quickstart.md", "legacy/torch.md", "llm-api/index.md", "llm-api/reference.rst", "models/adding-new-model.md", "models/supported-models.md", "overview.md", "quick-start-guide.md", "release-notes.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "torch/auto_deploy/advanced/example_run.md", "torch/auto_deploy/advanced/expert_configurations.md", "torch/auto_deploy/advanced/logging.md", "torch/auto_deploy/advanced/serving_with_trtllm_serve.md", "torch/auto_deploy/advanced/workflow.md", "torch/auto_deploy/auto-deploy.md", "torch/auto_deploy/support_matrix.md", "torch/features/checkpoint_loading.md", "torch/features/lora.md", "torch/features/overlap_scheduler.md", "torch/features/quantization.md", "torch/features/sampling.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--accuracy_threshold": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false]], "--apply_chat_template": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false]], "--backend": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-throughput-backend", false], [24, "cmdoption-trtllm-eval-backend", false], [27, "cmdoption-trtllm-serve-serve-backend", false]], "--beam_width": [[22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false]], "--chat_template": [[27, "cmdoption-trtllm-serve-serve-chat_template", false]], "--chat_template_kwargs": [[24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false]], "--check_accuracy": [[24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false]], "--cluster_size": [[22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false]], "--concurrency": [[22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false]], "--config": [[22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [24, "cmdoption-trtllm-eval-config", false], [27, "cmdoption-trtllm-serve-serve-config", false]], "--config_file": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--context_parallel_size": [[27, "cmdoption-trtllm-serve-serve-context_parallel_size", false]], "--cot": [[24, "cmdoption-trtllm-eval-longbench_v2-cot", false]], "--cp_size": [[27, "cmdoption-trtllm-serve-serve-context_parallel_size", false]], "--custom_module_dirs": [[22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false]], "--data_device": [[22, "cmdoption-trtllm-bench-throughput-data_device", false]], "--dataset": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false]], "--dataset_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false]], "--difficulty": [[24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false]], "--disable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--disable_kv_cache_reuse": [[24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false]], "--disagg_cluster_uri": [[27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false]], "--domain": [[24, "cmdoption-trtllm-eval-longbench_v2-domain", false]], "--enable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--enable_chunked_prefill": [[27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false]], "--engine_dir": [[22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false]], "--eos_id": [[22, "cmdoption-trtllm-bench-throughput-eos_id", false]], "--ep": [[22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-throughput-ep", false]], "--ep_size": [[24, "cmdoption-trtllm-eval-ep_size", false], [27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false]], "--extra_encoder_options": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false]], "--extra_llm_api_options": [[22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [24, "cmdoption-trtllm-eval-config", false], [27, "cmdoption-trtllm-serve-serve-config", false]], "--fail_fast_on_attention_window_too_large": [[27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false]], "--fewshot_as_multiturn": [[24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false]], "--free_gpu_memory_fraction": [[27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false]], "--gpus_per_node": [[24, "cmdoption-trtllm-eval-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-serve-host", false]], "--image_data_format": [[22, "cmdoption-trtllm-bench-throughput-image_data_format", false]], "--iteration_log": [[22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false]], "--kv_cache_free_gpu_mem_fraction": [[22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false]], "--kv_cache_free_gpu_memory_fraction": [[24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false]], "--length": [[24, "cmdoption-trtllm-eval-longbench_v2-length", false]], "--log_level": [[22, "cmdoption-trtllm-bench-log_level", false], [24, "cmdoption-trtllm-eval-log_level", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[24, "cmdoption-trtllm-eval-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_input_len": [[22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false]], "--max_input_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false]], "--max_len": [[24, "cmdoption-trtllm-eval-longbench_v2-max_len", false]], "--max_num_tokens": [[22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_output_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false]], "--max_seq_len": [[22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--media_io_kwargs": [[27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false]], "--medusa_choices": [[22, "cmdoption-trtllm-bench-latency-medusa_choices", false]], "--metadata_server_config_file": [[27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false]], "--metrics-log-interval": [[27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false]], "--modality": [[22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-throughput-modality", false]], "--model": [[22, "cmdoption-trtllm-bench-m", false], [24, "cmdoption-trtllm-eval-model", false]], "--model_path": [[22, "cmdoption-trtllm-bench-model_path", false]], "--moe_cluster_parallel_size": [[27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false]], "--moe_expert_parallel_size": [[27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false]], "--no_context": [[24, "cmdoption-trtllm-eval-longbench_v2-no_context", false]], "--no_skip_tokenizer_init": [[22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false]], "--no_weights_loading": [[22, "cmdoption-trtllm-bench-build-no_weights_loading", false]], "--num_fewshot": [[24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false]], "--num_postprocess_workers": [[27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--num_requests": [[22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false]], "--num_samples": [[24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false]], "--otlp_traces_endpoint": [[27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false]], "--output_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false]], "--output_json": [[22, "cmdoption-trtllm-bench-throughput-output_json", false]], "--pipeline_parallel_size": [[27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false]], "--port": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-serve-port", false]], "--pp": [[22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-throughput-pp", false]], "--pp_size": [[22, "cmdoption-trtllm-bench-build-pp", false], [24, "cmdoption-trtllm-eval-pp_size", false], [27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false]], "--prompts_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false]], "--quantization": [[22, "cmdoption-trtllm-bench-build-q", false]], "--rag": [[24, "cmdoption-trtllm-eval-longbench_v2-rag", false]], "--random_seed": [[24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false]], "--reasoning_parser": [[27, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--report_json": [[22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false]], "--request_json": [[22, "cmdoption-trtllm-bench-throughput-request_json", false]], "--request_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "--revision": [[22, "cmdoption-trtllm-bench-revision", false], [24, "cmdoption-trtllm-eval-revision", false], [27, "cmdoption-trtllm-serve-serve-revision", false]], "--rouge_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false]], "--sampler_options": [[22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false]], "--scheduler_policy": [[22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false]], "--server_role": [[27, "cmdoption-trtllm-serve-serve-server_role", false]], "--server_start_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "--start_idx": [[24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false]], "--streaming": [[22, "cmdoption-trtllm-bench-throughput-streaming", false]], "--system_prompt": [[24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "--target_input_len": [[22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false]], "--target_output_len": [[22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false]], "--tensor_parallel_size": [[27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false]], "--tokenizer": [[24, "cmdoption-trtllm-eval-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tool_parser": [[27, "cmdoption-trtllm-serve-serve-tool_parser", false]], "--tp": [[22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-throughput-tp", false]], "--tp_size": [[22, "cmdoption-trtllm-bench-build-tp", false], [24, "cmdoption-trtllm-eval-tp_size", false], [27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false]], "--trust_remote_code": [[22, "cmdoption-trtllm-bench-build-trust_remote_code", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "--warmup": [[22, "cmdoption-trtllm-bench-latency-warmup", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "--workspace": [[22, "cmdoption-trtllm-bench-w", false]], "-c": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-l": [[27, "cmdoption-trtllm-serve-disaggregated-l", false]], "-m": [[22, "cmdoption-trtllm-bench-m", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false]], "-pp": [[22, "cmdoption-trtllm-bench-build-pp", false]], "-q": [[22, "cmdoption-trtllm-bench-build-q", false]], "-r": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "-tp": [[22, "cmdoption-trtllm-bench-build-tp", false]], "-w": [[22, "cmdoption-trtllm-bench-w", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[155, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[155, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.lorarequest method)": [[155, "tensorrt_llm.llmapi.LoRARequest.__init__", false]], "__init__() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.__init__", false]], "__init__() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[155, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requesterror method)": [[155, "tensorrt_llm.llmapi.RequestError.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker.input method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.__init__", false]], "__init__() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[155, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abort() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.abort", false]], "aborted() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.aborted", false]], "abs() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.abs", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold", false]], "acceptance_window (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window", false]], "activation() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "adapter_id (tensorrt_llm.llmapi.lorarequest property)": [[155, "tensorrt_llm.llmapi.LoRARequest.adapter_id", false]], "add() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[141, "tensorrt_llm.functional.Conditional.add_input", false]], "add_note() (tensorrt_llm.llmapi.requesterror method)": [[155, "tensorrt_llm.llmapi.RequestError.add_note", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[141, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[146, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_context_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.additional_context_outputs", false]], "additional_generation_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.additional_generation_outputs", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "algorithm (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm", false]], "algorithm (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.algorithm", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.allgather", false]], "allreduce() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.allreduce", false]], "allreduce_strategy (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.arange", false]], "aresult() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.aresult", false]], "argmax() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.argmax", false]], "args (tensorrt_llm.llmapi.requesterror attribute)": [[155, "tensorrt_llm.llmapi.RequestError.args", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[143, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.assertion", false]], "attention (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.Attention", false]], "attention_dp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.attention_dp_config", false]], "attention_dp_events_gather_period_ms (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.attention_dp_events_gather_period_ms", false]], "attentiondpconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig", false]], "attentiondpconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.Config", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.AUTO", false]], "autodecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig", false]], "autodecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.Config", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[142, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.axes", false]], "backend (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.backend", false]], "backend (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.backend", false]], "backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.backend", false]], "backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.backend", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batch_sizes (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.batch_sizes", false]], "batch_wait_max_tokens_ratio (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_max_tokens_ratio", false]], "batch_wait_timeout_iters (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_iters", false]], "batch_wait_timeout_ms (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_ms", false]], "batched_logits_processor (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.batched_logits_processor", false]], "batched_logits_processor (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.batched_logits_processor", false]], "batching_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.batching_type", false]], "batching_wait_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.batching_wait_iters", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "begin_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token", false]], "bert_attention() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.bert_attention", false]], "bert_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.bert_attention_plugin", false]], "bert_context_fmha_fp32_acc (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.bert_context_fmha_fp32_acc", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[155, "id13", false]], "cache_transceiver_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.cache_transceiver_config", false]], "cache_transceiver_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.cache_transceiver_config", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "cachetransceiverconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CalibConfig", false]], "calibconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CalibConfig.Config", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "capitalize() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.capitalize", false]], "capture_num_tokens (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.capture_num_tokens", false]], "casefold() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.casefold", false]], "casefold() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.casefold", false]], "cast (class in tensorrt_llm.layers.cast)": [[142, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "center() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.center", false]], "center() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.center", false]], "center() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.center", false]], "center() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.center", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[143, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[143, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[143, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[143, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[143, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[143, "tensorrt_llm.models.PretrainedModel.check_config", false]], "check_eagle_choices() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.check_eagle_choices", false]], "checkpoint_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_format", false]], "checkpoint_loader (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_loader", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.chunk", false]], "ckpt_source (tensorrt_llm.llmapi.lorarequest property)": [[155, "tensorrt_llm.llmapi.LoRARequest.ckpt_source", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clear_logprob_params() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.clear_logprob_params", false]], "client_id (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.client_id", false]], "clip() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.CLIPVisionTransformer", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[142, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[142, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[142, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[143, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[143, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[143, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[143, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[143, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[143, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[143, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[143, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[143, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[143, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[143, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[143, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[143, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[143, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[143, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[143, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.constants_to_tensors_", false]], "construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.construct", false]], "construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.construct", false]], "construct() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.construct", false]], "construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.construct", false]], "construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.construct", false]], "construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.construct", false]], "construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.construct", false]], "construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.construct", false]], "construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.construct", false]], "construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.construct", false]], "context (tensorrt_llm.runtime.session property)": [[146, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.context_fmha", false]], "context_fmha_type (tensorrt_llm.plugin.pluginconfig property)": [[144, "tensorrt_llm.plugin.PluginConfig.context_fmha_type", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_logits (tensorrt_llm.llmapi.requestoutput property)": [[155, "id6", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[146, "tensorrt_llm.runtime.Session.context_mem_size", false]], "context_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.context_parallel_size", false]], "context_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.context_parallel_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[142, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[142, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[142, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_enable_disable() (tensorrt_llm.plugin.pluginconfig class method)": [[144, "tensorrt_llm.plugin.PluginConfig.convert_enable_disable", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[142, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.copy", false]], "copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.copy", false]], "copy() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.copy", false]], "copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.copy", false]], "copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.copy", false]], "copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.copy", false]], "copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.copy", false]], "copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.copy", false]], "copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.copy", false]], "copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.copy", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.cos", false]], "count() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.count", false]], "count() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.count", false]], "count() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.count", false]], "count() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.count", false]], "count() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.count", false]], "cp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.cp_config", false]], "cp_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.cp_config", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[142, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[143, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_long_rope_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope_for_attention_plugin", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[142, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_config", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.cuda_stream_sync", false]], "cudagraphconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig", false]], "cudagraphconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.Config", false]], "cumsum() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseeksparseattentionconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig", false]], "deepseeksparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[143, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "default_record_creator() (tensorrt_llm.llmapi.requestoutput.postprocworker static method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.default_record_creator", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.device", false]], "dict() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.dict", false]], "dict() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.dict", false]], "dict() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.dict", false]], "dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.dict", false]], "dict() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.dict", false]], "dict() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.dict", false]], "dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.dict", false]], "dict() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.dict", false]], "dict() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.dict", false]], "dict() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.dict", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[141, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_finalize_fusion (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.disable_finalize_fusion", false]], "disable_flashinfer_sampling (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.disable_flashinfer_sampling", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregated_params (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.div", false]], "do_tracing() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.do_tracing", false]], "dora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.dora_plugin", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.dora_plugin", false]], "draft_len_schedule (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "drafter (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter", false]], "drafttargetdecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig", false]], "drafttargetdecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[141, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.dtype", false]], "dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.dtype", false]], "dtype (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.dtype", false]], "dtype (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[146, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "dynamicbatchconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.Config", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagle_choices (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagledecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.Config", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_attention_dp (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_attention_dp", false]], "enable_attention_dp (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_attention_dp", false]], "enable_autotuner (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner", false]], "enable_balance (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.enable_balance", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_chunked_prefill", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_chunked_prefill", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_fullgraph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.enable_fullgraph", false]], "enable_inductor (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.enable_inductor", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lora (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lora", false]], "enable_lora (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lora", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_padding (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.enable_padding", false]], "enable_paged_kv_cache() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.enable_paged_kv_cache", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_piecewise_cuda_graph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.enable_piecewise_cuda_graph", false]], "enable_prompt_adapter (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_prompt_adapter", false]], "enable_sleep (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.enable_sleep", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_userbuffers (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.enable_userbuffers", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encode() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.encode", false]], "encode() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.encode", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[146, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "end_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.end_thinking_phase_token", false]], "endswith() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.endswith", false]], "endswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.endswith", false]], "engine (tensorrt_llm.runtime.session property)": [[146, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "env_overrides (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.env_overrides", false]], "env_overrides (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.env_overrides", false]], "eq() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "error (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.error", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.expand_mask", false]], "expandtabs() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.expandtabs", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extendedruntimeperfknobconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config", false]], "extra (tensorrt_llm.llmapi.attentiondpconfig.config attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.autodecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cachetransceiverconfig.config attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.calibconfig.config attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cudagraphconfig.config attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig.config attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.drafttargetdecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.dynamicbatchconfig.config attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.eagledecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig.config attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.kvcacheconfig.config attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.lookaheaddecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.medusadecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.moeconfig.config attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.mtpdecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.ngramdecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.rocketsparseattentionconfig.config attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.schedulerconfig.config attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchcompileconfig.config attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchllmargs.config attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.trtllmargs.config attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.userprovideddecodingconfig.config attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config.extra", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.fail_fast_on_attention_window_too_large", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.fail_fast_on_attention_window_too_large", false]], "falconconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[142, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[142, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "file_prefix (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.file_prefix", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[142, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[142, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[142, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[142, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.find", false]], "find() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.find", false]], "find() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.find", false]], "find() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.find", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "finished (tensorrt_llm.llmapi.requestoutput property)": [[155, "id7", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_dynamic_quantization (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.force_dynamic_quantization", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "format() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.format", false]], "format() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format", false]], "format() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.format", false]], "format() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.format", false]], "format_map() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.format_map", false]], "format_map() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.format_map", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[142, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[142, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[142, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[142, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[142, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[142, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[142, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[142, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[142, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[142, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[142, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[142, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[142, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[142, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[142, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[142, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[142, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[142, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[142, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[142, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[142, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[142, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[142, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[142, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[142, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[142, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[142, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[142, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[142, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[142, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[142, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[142, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[142, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[142, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[143, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[143, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[143, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[143, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[143, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[143, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[143, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[143, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[143, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[143, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[143, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[143, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[143, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[143, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[143, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[143, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[143, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[143, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[143, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[143, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[143, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "fp8_rowwise_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.fp8_rowwise_gemm_plugin", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_arguments() (tensorrt_llm.plugin.pluginconfig class method)": [[144, "tensorrt_llm.plugin.PluginConfig.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[143, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[143, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[143, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[155, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[143, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[146, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[146, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[146, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[146, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[143, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[143, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[143, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[143, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[143, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[143, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[143, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[143, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[143, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[143, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[143, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[143, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[143, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[143, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[143, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[143, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[143, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[143, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[143, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[143, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[143, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[143, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[143, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[143, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[143, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_kwargs() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs", false]], "from_kwargs() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[143, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_model_config_cpp() (tensorrt_llm.runtime.modelconfig class method)": [[146, "tensorrt_llm.runtime.ModelConfig.from_model_config_cpp", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[143, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[143, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_orm() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_orm", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.from_pybind", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[146, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[141, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_fp4_quant (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.fuse_fp4_quant", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[141, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "garbage_collection_gen0_threshold (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.garbage_collection_gen0_threshold", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[141, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.gemm_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gemm_swiglu", false]], "gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.gemm_swiglu_plugin", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[143, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[143, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[143, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[143, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[143, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[146, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[146, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[146, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[146, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[146, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_executor_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[142, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[143, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_indices_block_size() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.get_indices_block_size", false]], "get_indices_block_size() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_enum_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_variable_fields", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.get_runtime_sizes", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[146, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_stats_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.gpt_attention_plugin", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "gpus_per_node (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.gpus_per_node", false]], "gpus_per_node (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.gpus_per_node", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[141, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guided_decoding_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.guided_decoding_backend", false]], "guided_decoding_backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.guided_decoding_backend", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[141, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[141, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[141, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.identity", false]], "identity_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.identity_plugin", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.index", false]], "index() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.index", false]], "index() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.index", false]], "index() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.index", false]], "index() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.index", false]], "index_head_dim (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_head_dim", false]], "index_n_heads (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads", false]], "index_select() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.index_select", false]], "index_topk (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk", false]], "indexer_max_chunk_size (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[146, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[155, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_backend() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.init_backend", false]], "init_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.init_build_config", false]], "init_calib_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.init_calib_config", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_comm_session() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.is_comm_session", false]], "is_context_fmha_enabled() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.is_context_fmha_enabled", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_final (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.is_final", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[143, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[143, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_linear_tree (tensorrt_llm.llmapi.autodecodingconfig property)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.eagledecodingconfig property)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.medusadecodingconfig property)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.is_linear_tree", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[155, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.functional.moeallreduceparams method)": [[141, "tensorrt_llm.functional.MoEAllReduceParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[142, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[142, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[142, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "isalnum() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isalnum", false]], "isalpha() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isalpha", false]], "isascii() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isascii", false]], "isascii() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isascii", false]], "isdecimal() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isdecimal", false]], "isdigit() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isdigit", false]], "isidentifier() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isidentifier", false]], "islower() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.islower", false]], "islower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.islower", false]], "isnumeric() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isnumeric", false]], "isprintable() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isprintable", false]], "isspace() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isspace", false]], "isspace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isspace", false]], "istitle() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.istitle", false]], "istitle() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.istitle", false]], "isupper() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.isupper", false]], "isupper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.isupper", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.iter_stats_max_iterations", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.iter_stats_max_iterations", false]], "join() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.join", false]], "join() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.join", false]], "join() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.join", false]], "join() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.join", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[142, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.json", false]], "json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.json", false]], "json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.json", false]], "json() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.json", false]], "json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.json", false]], "json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.json", false]], "json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.json", false]], "json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.json", false]], "json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.json", false]], "json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.json", false]], "json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.json", false]], "json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "kernel_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kernel_size", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kt_cache_dtype (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kt_cache_dtype", false]], "kv_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_config", false]], "kv_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.kv_cache_config", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_connector_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.kv_connector_config", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[143, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kv_transfer_sender_future_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_sender_future_timeout_ms", false]], "kv_transfer_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_timeout_ms", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcacheconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.Config", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[155, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[141, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.layernorm_quantization_plugin", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[155, "id2", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[142, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[142, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "ljust() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.ljust", false]], "ljust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.ljust", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llm_id (tensorrt_llm.llmapi.llm attribute)": [[155, "tensorrt_llm.llmapi.LLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm property)": [[155, "id0", false]], "llm_id (tensorrt_llm.llmapi.multimodalencoder property)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.llm_id", false]], "llmargs (in module tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[143, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_balancer (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.load_balancer", false]], "load_format (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[141, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.log", false]], "log_field_changes() (tensorrt_llm.plugin.pluginconfig class method)": [[144, "tensorrt_llm.plugin.PluginConfig.log_field_changes", false]], "log_softmax() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[155, "id3", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lookaheaddecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config", false]], "lora_ckpt_source (tensorrt_llm.llmapi.lorarequest attribute)": [[155, "tensorrt_llm.llmapi.LoRARequest.lora_ckpt_source", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.lora_config", false]], "lora_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.lora_config", false]], "lora_int_id (tensorrt_llm.llmapi.lorarequest attribute)": [[155, "tensorrt_llm.llmapi.LoRARequest.lora_int_id", false]], "lora_name (tensorrt_llm.llmapi.lorarequest attribute)": [[155, "tensorrt_llm.llmapi.LoRARequest.lora_name", false]], "lora_path (tensorrt_llm.llmapi.lorarequest attribute)": [[155, "tensorrt_llm.llmapi.LoRARequest.lora_path", false]], "lora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.lora_plugin", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "lorarequest (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.LoRARequest", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_plugin", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "low_latency_gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_swiglu_plugin", false]], "lower() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.lower", false]], "lower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.lower", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lstrip() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.lstrip", false]], "lt() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.make_causal_mask", false]], "maketrans() (tensorrt_llm.llmapi.batchingtype static method)": [[155, "tensorrt_llm.llmapi.BatchingType.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.capacityschedulerpolicy static method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.contextchunkingpolicy static method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.quantalgo static method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.maketrans", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.mamba_conv1d_plugin", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.mamba_ssm_cache_dtype", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.mamba_ssm_cache_dtype", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MambaForCausalLM", false]], "manage_weights (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.manage_weights", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[155, "id14", false]], "max_concurrency (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_concurrency", false]], "max_draft_len (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_gpu_total_bytes (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.max_gpu_total_bytes", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_input_len", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_streams (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.max_num_streams", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_num_tokens", false]], "max_prompt_adapter_token (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_prompt_adapter_token", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[155, "id15", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_tokens_in_buffer (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_tokens_in_buffer", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_total_draft_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.maximum", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.maybe_to_pybind", false]], "mean() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusadecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.meshgrid2d", false]], "metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.metrics", false]], "min() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.minimum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_enum", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_fields", false]], "mish (class in tensorrt_llm.layers.activation)": [[142, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[142, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[141, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.MLPType", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.mm_embedding_handle", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput property)": [[155, "id8", false]], "mm_encoder_only (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.mm_encoder_only", false]], "mnnvl (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.MNNVL", false]], "model": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.model", false]], "model (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.model", false]], "model_computed_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_computed_fields", false]], "model_config (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_config", false]], "model_construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_construct", false]], "model_copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_copy", false]], "model_dump() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump", false]], "model_dump_json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump_json", false]], "model_extra (tensorrt_llm.llmapi.attentiondpconfig property)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.autodecodingconfig property)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.buildconfig property)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.calibconfig property)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cudagraphconfig property)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.eagledecodingconfig property)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.kvcacheconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.medusadecodingconfig property)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.moeconfig property)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.schedulerconfig property)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.torchcompileconfig property)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_extra", false]], "model_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields", false]], "model_fields_set (tensorrt_llm.llmapi.attentiondpconfig property)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.autodecodingconfig property)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.buildconfig property)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.calibconfig property)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cudagraphconfig property)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.eagledecodingconfig property)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.kvcacheconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.medusadecodingconfig property)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.moeconfig property)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.schedulerconfig property)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.torchcompileconfig property)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields_set", false]], "model_format (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.model_format", false]], "model_format (tensorrt_llm.llmapi.trtllmargs property)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.model_format", false]], "model_json_schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_json_schema", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_parametrized_name", false]], "model_post_init() (tensorrt_llm.llmapi.attentiondpconfig method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cudagraphconfig method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.kvcacheconfig method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.moeconfig method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.schedulerconfig method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.torchcompileconfig method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.model_post_init", false]], "model_rebuild() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_rebuild", false]], "model_validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate", false]], "model_validate_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_json", false]], "model_validate_strings() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_strings", false]], "modelconfig (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[141, "module-tensorrt_llm", false], [141, "module-tensorrt_llm.functional", false], [142, "module-tensorrt_llm", false], [142, "module-tensorrt_llm.layers.activation", false], [142, "module-tensorrt_llm.layers.attention", false], [142, "module-tensorrt_llm.layers.cast", false], [142, "module-tensorrt_llm.layers.conv", false], [142, "module-tensorrt_llm.layers.embedding", false], [142, "module-tensorrt_llm.layers.linear", false], [142, "module-tensorrt_llm.layers.mlp", false], [142, "module-tensorrt_llm.layers.normalization", false], [142, "module-tensorrt_llm.layers.pooling", false], [143, "module-tensorrt_llm", false], [143, "module-tensorrt_llm.models", false], [144, "module-tensorrt_llm", false], [144, "module-tensorrt_llm.plugin", false], [145, "module-tensorrt_llm", false], [145, "module-tensorrt_llm.quantization", false], [146, "module-tensorrt_llm", false], [146, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[141, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.moe_cluster_parallel_size", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.moe_cluster_parallel_size", false]], "moe_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.moe_config", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.moe_expert_parallel_size", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.moe_expert_parallel_size", false]], "moe_finalize_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.moe_plugin", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.moe_tensor_parallel_size", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.moe_tensor_parallel_size", false]], "moeallreduceparams (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.MoEAllReduceParams", false]], "moeconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MoeConfig", false]], "moeconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MoeConfig.Config", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpi_session (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.mpi_session", false]], "mpi_session (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.mpi_session", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtp_eagle_one_model (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.mtp_eagle_one_model", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mtpdecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.Config", false]], "mul() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodal_embedding_handles (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_embedding_handles", false]], "multimodal_hashes (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_hashes", false]], "multimodalencoder (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiple_profiles (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.multiple_profiles", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[142, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[141, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.llmapi.lorarequest property)": [[155, "tensorrt_llm.llmapi.LoRARequest.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[146, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[143, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "nccl_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.nccl_plugin", false]], "nccl_symmetric (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.ndim", false]], "network (tensorrt_llm.functional.tensor property)": [[141, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "ngramdecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.Config", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.nonzero", false]], "norm_quant_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.norm_quant_fusion", false]], "normalize_log_probs (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.normalize_log_probs", false]], "not_op() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_capture_layers (tensorrt_llm.llmapi.eagledecodingconfig property)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_nextn_predict_layers_from_model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers_from_model_config", false]], "num_postprocess_workers (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.num_postprocess_workers", false]], "num_postprocess_workers (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.num_postprocess_workers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[146, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvfp4_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.NVFP4_AWQ", false]], "nvfp4_gemm_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.nvfp4_gemm_config", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.OPTModel", false]], "orchestrator_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.orchestrator_type", false]], "orchestrator_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.orchestrator_type", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.otlp_traces_endpoint", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.otlp_traces_endpoint", false]], "outer() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_directory (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.output_directory", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "outputs (tensorrt_llm.llmapi.requestoutput property)": [[155, "id9", false]], "pad() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "page_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size", false]], "paged_kv_cache (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.paged_kv_cache", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.paged_state", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "parallel_config (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.parallel_config", false]], "parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.parallel_config", false]], "params_imply_greedy_decoding() (tensorrt_llm.llmapi.samplingparams static method)": [[155, "tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding", false]], "parse_file() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_file", false]], "parse_obj() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_obj", false]], "parse_raw() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_raw", false]], "partition() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.partition", false]], "partition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.partition", false]], "path (tensorrt_llm.llmapi.lorarequest property)": [[155, "tensorrt_llm.llmapi.LoRARequest.path", false]], "peft_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.peft_cache_config", false]], "peft_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.peft_cache_config", false]], "perf_metrics_max_requests (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.perf_metrics_max_requests", false]], "permute() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.PhiModel", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.pipeline_parallel_size", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.pipeline_parallel_size", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[141, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postproc_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.postproc_params", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[142, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[142, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[142, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[142, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.postprocess_tokenizer_dir", false]], "pow() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pp_partition (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.pp_partition", false]], "pp_partition (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.pp_partition", false]], "pp_reduce_scatter (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.pp_reduce_scatter", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[141, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[143, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[143, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[143, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[143, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[143, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[143, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[143, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[143, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[143, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[143, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[143, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[146, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[155, "id10", false]], "prompt_budget (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget", false]], "prompt_ignore_length (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.prompt_ignore_length", false]], "prompt_ignore_length (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.prompt_ignore_length", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput property)": [[155, "id11", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "pybind_equals() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.schedulerconfig static method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.pybind_equals", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "qserve_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.qserve_gemm_plugin", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[143, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_config (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.quant_config", false]], "quant_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.quant_config", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[155, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[143, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[145, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[143, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[143, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[143, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[143, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[143, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[143, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[145, "tensorrt_llm.quantization.quantize_and_export", false]], "quantize_per_token_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.quantize_per_token_plugin", false]], "quantize_tensor_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.quantize_tensor_plugin", false]], "quantmode (class in tensorrt_llm.quantization)": [[145, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.rank", false]], "ray_worker_extension_cls (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls", false]], "rearrange() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.rearrange", false]], "reasoning_parser (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser", false]], "reasoning_parser (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.reasoning_parser", false]], "record_stats() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.record_stats", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforllamalm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.ReDrafterForLLaMALM", false]], "redrafterforqwenlm (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.ReDrafterForQWenLM", false]], "reduce() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.reduce", false]], "reduce_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.reduce_fusion", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[143, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "removeprefix() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.removeprefix", false]], "removesuffix() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.removesuffix", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.replace", false]], "replace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.replace", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_id (tensorrt_llm.llmapi.requestoutput property)": [[155, "id12", false]], "request_perf_metrics (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.request_perf_metrics", false]], "request_perf_metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.request_perf_metrics", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.request_stats_max_iterations", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[155, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RequestOutput", false]], "requestoutput.postprocworker (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker", false]], "requestoutput.postprocworker.input (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input", false]], "requestoutput.postprocworker.output (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output", false]], "res (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.res", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[141, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "resource_manager (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager", false]], "result() (tensorrt_llm.llmapi.requestoutput method)": [[155, "tensorrt_llm.llmapi.RequestOutput.result", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.return_perf_metrics", false]], "revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.revision", false]], "revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.revision", false]], "rfind() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rfind", false]], "rfind() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rfind", false]], "rg_lru() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.rg_lru", false]], "rindex() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rindex", false]], "rindex() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rindex", false]], "rjust() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rjust", false]], "rjust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rjust", false]], "rms_norm() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[141, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rmsnorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.rmsnorm_quantization_plugin", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[143, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[143, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[143, "tensorrt_llm.models.RobertaModel", false]], "rocketsparseattentionconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig", false]], "rocketsparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[141, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[142, "tensorrt_llm.layers.linear.RowLinear", false]], "rpartition() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rpartition", false]], "rsp (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.rsp", false]], "rsplit() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rsplit", false]], "rstrip() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.rstrip", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[146, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[146, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[146, "tensorrt_llm.runtime.Session.runtime", false]], "sampler_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.sampler_type", false]], "sampling_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.SamplingParams", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[143, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[143, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "save_hidden_states (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.SAVE_HIDDEN_STATES", false]], "savehiddenstatesdecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig", false]], "savehiddenstatesdecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config", false]], "scatter() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.scatter_nd", false]], "scheduler_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config", false]], "scheduler_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.scheduler_config", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.SchedulerConfig", false]], "schedulerconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.Config", false]], "schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.schema", false]], "schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema", false]], "schema() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.schema", false]], "schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.schema", false]], "schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema", false]], "schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.schema", false]], "schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.schema", false]], "schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.schema", false]], "schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.schema", false]], "schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema", false]], "schema_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema_json", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[142, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.send", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[146, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_context_fmha() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_context_fmha", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.set_default_max_input_len", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.set_default_max_input_len", false]], "set_dora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_dora_plugin", false]], "set_fp8_rowwise_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_fp8_rowwise_quant_plugins", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_lora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_lora_plugin", false]], "set_nccl_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_nccl_plugin", false]], "set_qserve_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_qserve_plugins", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[142, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[146, "tensorrt_llm.runtime.Session.set_shapes", false]], "set_smooth_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_embedding_parallel_mode() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[141, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[146, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[155, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.multimodalencoder method)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.shutdown", false]], "shutdown_abort() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.shutdown_abort", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.skip_tokenizer_init", false]], "slice() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[141, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smooth_quant_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.smooth_quant_gemm_plugin", false]], "smooth_quant_plugins (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.smooth_quant_plugins", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "sparse_attention_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.sparse_attention_config", false]], "sparse_attention_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.sparse_attention_config", false]], "spec_dec_mode (tensorrt_llm.llmapi.autodecodingconfig property)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.eagledecodingconfig property)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.medusadecodingconfig property)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.spec_dec_mode", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[142, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_config", false]], "speculative_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_config", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculative_model_dir (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.trtllmargs property)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.speculative_model_dir", false]], "speculative_model_format (tensorrt_llm.llmapi.torchllmargs property)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_format", false]], "speculative_model_format (tensorrt_llm.llmapi.trtllmargs property)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_format", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.split", false]], "split() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.split", false]], "split() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.split", false]], "split() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.split", false]], "split() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "splitlines() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.splitlines", false]], "sqrt() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[146, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.start", false]], "start() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.start", false]], "startswith() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.startswith", false]], "startswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.startswith", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[155, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[146, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stream_interval (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.stream_interval", false]], "streaming (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[155, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.streaming", false]], "streamingllm (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.streamingllm", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[141, "tensorrt_llm.functional.SliceInputType.stride", false]], "strip() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.strip", false]], "strip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.strip", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[155, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[155, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.sum", false]], "supports_backend() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.supports_backend", false]], "swapcase() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.swapcase", false]], "swiglu() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.swiglu", false]], "sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype", false]], "tanh() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.Tensor", false]], "tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.tensor_parallel_size", false]], "tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.tensor_parallel_size", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[146, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[141, "module-tensorrt_llm", false], [142, "module-tensorrt_llm", false], [143, "module-tensorrt_llm", false], [144, "module-tensorrt_llm", false], [145, "module-tensorrt_llm", false], [146, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[141, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[142, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[142, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[142, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[142, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[142, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[142, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[142, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[142, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[142, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[143, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[144, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[145, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[146, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachesaltidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::default (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::mpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::nixl (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::ucx (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getbackendtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfertimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxtokensinbuffer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mbackendtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfersenderfuturetimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfertimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxtokensinbuffer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setbackendtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfertimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxtokensinbuffer (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mfailfastonattentionwindowtoolarge (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kllguidance (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecodingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::multithread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::executeloopbackrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::~baseloopbackagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::gethasindexerkcache (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerdimperhead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerkcachequantblocksize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mhasindexerkcache (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerdimperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerkcachequantblocksize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mattentionlayernumperpp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mcontextparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", false]], "tensorrt_llm::executor::kv_cache::filedesc::fd (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", false]], "tensorrt_llm::executor::kv_cache::filedesc::filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::getfd (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::filedesc::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::~filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", false]], "tensorrt_llm::executor::kv_cache::filedescs::filedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", false]], "tensorrt_llm::executor::kv_cache::filedescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::filedescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::makeloopbackagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxgputotalbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::getuseuvm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", false]], "tensorrt_llm::executor::kvcacheconfig::kdefaultgpumemfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::mattentiondpeventsgatherperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxgputotalbytes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::museuvm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", false]], "tensorrt_llm::executor::kvcacheconfig::setattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxgputotalbytes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setuseuvm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::attentiondprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheevent::windowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::multimodalinput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalhashes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodallengths (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalpositions (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodallengths (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalpositions (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", false]], "tensorrt_llm::executor::multimodalinput::multimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getcachesaltid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getmultimodalinput (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setcachesaltid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setmultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::avgdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getpromptignorelength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mpromptignorelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeblockkey (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachecreateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheeventdiff (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheremoveddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoreddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeuniquetoken (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::cachesaltidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::clearvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::aligned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::backgroundconfiguration (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::configuration (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::malignment (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration10mAlignmentE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackground (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mtag (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudavirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::deallocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::cpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::memset (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::none (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::pinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::_release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::~configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configuratorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurators (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::~creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creatorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::invalid_state (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::materialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mconfigurators (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mcreator (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mhandle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::errored (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::invalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::materialized (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::released (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::~cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::add (c++ function)": [[1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::addbadhandle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mentryit (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mmemory (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::materializewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mbadhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mentries (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmemories (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::pointermemorymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::releasewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::remove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::retrievebadhandles (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::tagentrymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::unsaferemove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectioninput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectionoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::geteaglebuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getexplicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgenerationsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlookaheadbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxnumsequences (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapecacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setgenerationsteps (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirection (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::getvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::getvirtualmemorymanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::localcreator (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", false]], "tensorrt_llm::runtime::localcreator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", false]], "tensorrt_llm::runtime::localcreator::localcreator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", false]], "tensorrt_llm::runtime::localcreator::mprop (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", false]], "tensorrt_llm::runtime::localcreator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", false]], "tensorrt_llm::runtime::localcreator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::memsetconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", false]], "tensorrt_llm::runtime::memsetconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", false]], "tensorrt_llm::runtime::memsetconfigurator::memsetconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", false]], "tensorrt_llm::runtime::memsetconfigurator::mfirsttime (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", false]], "tensorrt_llm::runtime::memsetconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", false]], "tensorrt_llm::runtime::memsetconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", false]], "tensorrt_llm::runtime::memsetconfigurator::mvalue (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", false]], "tensorrt_llm::runtime::memsetconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::memsetconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsforgivenlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::multicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", false]], "tensorrt_llm::runtime::multicastconfigurator::mbindoffset (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", false]], "tensorrt_llm::runtime::multicastconfigurator::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", false]], "tensorrt_llm::runtime::multicastconfigurator::mmulticast (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", false]], "tensorrt_llm::runtime::multicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::multicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::multicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::offloadconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", false]], "tensorrt_llm::runtime::offloadconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbackedstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbacktype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mondemand (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", false]], "tensorrt_llm::runtime::offloadconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", false]], "tensorrt_llm::runtime::offloadconfigurator::offloadconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", false]], "tensorrt_llm::runtime::offloadconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::offloadconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::promptignorelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::unicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", false]], "tensorrt_llm::runtime::unicastconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", false]], "tensorrt_llm::runtime::unicastconfigurator::mdesc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", false]], "tensorrt_llm::runtime::unicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::unicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::unicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::unicastconfigurator::unicastconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[155, "id4", false]], "timeout_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.timeout_iters", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[142, "tensorrt_llm.layers.embedding.Timesteps", false]], "title() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.title", false]], "title() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.title", false]], "title() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.title", false]], "title() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.title", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[155, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[155, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[143, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[143, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[143, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[143, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[143, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[143, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[143, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[143, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[143, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[143, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[142, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[155, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[155, "id5", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[155, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[155, "id1", false]], "tokenizer (tensorrt_llm.llmapi.multimodalencoder property)": [[155, "tensorrt_llm.llmapi.MultimodalEncoder.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[155, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokenizer_mode (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_mode", false]], "tokenizer_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_mode", false]], "tokenizer_revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_revision", false]], "tokenizer_revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_revision", false]], "tokens_per_block (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk", false]], "topk() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.topk", false]], "topr (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr", false]], "torch_compile_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_config", false]], "torchcompileconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig", false]], "torchcompileconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.Config", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "torchllmargs.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.Config", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[142, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[142, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[142, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[155, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "translate() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.translate", false]], "translate() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.translate", false]], "transpose() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-bench command line option": [[22, "cmdoption-trtllm-bench-log_level", false], [22, "cmdoption-trtllm-bench-m", false], [22, "cmdoption-trtllm-bench-model_path", false], [22, "cmdoption-trtllm-bench-revision", false], [22, "cmdoption-trtllm-bench-w", false]], "trtllm-bench-build command line option": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-build-no_weights_loading", false], [22, "cmdoption-trtllm-bench-build-pp", false], [22, "cmdoption-trtllm-bench-build-q", false], [22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-build-tp", false], [22, "cmdoption-trtllm-bench-build-trust_remote_code", false]], "trtllm-bench-latency command line option": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-medusa_choices", false], [22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-latency-warmup", false]], "trtllm-bench-throughput command line option": [[22, "cmdoption-trtllm-bench-throughput-backend", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [22, "cmdoption-trtllm-bench-throughput-data_device", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false], [22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-eos_id", false], [22, "cmdoption-trtllm-bench-throughput-ep", false], [22, "cmdoption-trtllm-bench-throughput-image_data_format", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-modality", false], [22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-output_json", false], [22, "cmdoption-trtllm-bench-throughput-pp", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false], [22, "cmdoption-trtllm-bench-throughput-request_json", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false], [22, "cmdoption-trtllm-bench-throughput-streaming", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-tp", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "trtllm-eval command line option": [[24, "cmdoption-trtllm-eval-backend", false], [24, "cmdoption-trtllm-eval-config", false], [24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false], [24, "cmdoption-trtllm-eval-ep_size", false], [24, "cmdoption-trtllm-eval-gpus_per_node", false], [24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [24, "cmdoption-trtllm-eval-log_level", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_beam_width", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [24, "cmdoption-trtllm-eval-model", false], [24, "cmdoption-trtllm-eval-pp_size", false], [24, "cmdoption-trtllm-eval-revision", false], [24, "cmdoption-trtllm-eval-tokenizer", false], [24, "cmdoption-trtllm-eval-tp_size", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false]], "trtllm-eval-cnn_dailymail command line option": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false]], "trtllm-eval-gpqa_diamond command line option": [[24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false]], "trtllm-eval-gpqa_extended command line option": [[24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false]], "trtllm-eval-gpqa_main command line option": [[24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false]], "trtllm-eval-gsm8k command line option": [[24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false]], "trtllm-eval-json_mode_eval command line option": [[24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false]], "trtllm-eval-longbench_v2 command line option": [[24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-longbench_v2-cot", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false], [24, "cmdoption-trtllm-eval-longbench_v2-domain", false], [24, "cmdoption-trtllm-eval-longbench_v2-length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_len", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-no_context", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-rag", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false]], "trtllm-eval-mmlu command line option": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false]], "trtllm-eval-mmmu command line option": [[24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "trtllm-serve-disaggregated command line option": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false], [27, "cmdoption-trtllm-serve-disaggregated-r", false], [27, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-mm_embedding_serve command line option": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false]], "trtllm-serve-serve command line option": [[27, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-backend", false], [27, "cmdoption-trtllm-serve-serve-chat_template", false], [27, "cmdoption-trtllm-serve-serve-config", false], [27, "cmdoption-trtllm-serve-serve-context_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false], [27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false], [27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false], [27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-host", false], [27, "cmdoption-trtllm-serve-serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false], [27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-port", false], [27, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [27, "cmdoption-trtllm-serve-serve-revision", false], [27, "cmdoption-trtllm-serve-serve-server_role", false], [27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tool_parser", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "trtllmargs.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.Config", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "trust_remote_code (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.trust_remote_code", false]], "trust_remote_code (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.trust_remote_code", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[141, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[143, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[143, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[146, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_forward_refs() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.update_forward_refs", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[155, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[146, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[141, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "upper() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.upper", false]], "upper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.upper", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[146, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[155, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_fp8_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.use_fp8_context_fmha", false]], "use_fused_mlp (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.use_fused_mlp", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[143, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[143, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[143, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[143, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[143, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[143, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[143, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_low_precision_moe_combine (tensorrt_llm.llmapi.moeconfig attribute)": [[155, "tensorrt_llm.llmapi.MoeConfig.use_low_precision_moe_combine", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[155, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_mtp_vanilla (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.use_mtp_vanilla", false]], "use_paged_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.use_paged_context_fmha", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[143, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "use_uvm (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.use_uvm", false]], "user_buffer (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.user_buffer", false]], "user_provided (tensorrt_llm.models.speculativedecodingmode attribute)": [[143, "tensorrt_llm.models.SpeculativeDecodingMode.USER_PROVIDED", false]], "userprovideddecodingconfig (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig", false]], "userprovideddecodingconfig.config (class in tensorrt_llm.llmapi)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config", false]], "validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[155, "tensorrt_llm.llmapi.AttentionDpConfig.validate", false]], "validate() (tensorrt_llm.llmapi.autodecodingconfig method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.buildconfig class method)": [[155, "tensorrt_llm.llmapi.BuildConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[155, "tensorrt_llm.llmapi.CacheTransceiverConfig.validate", false]], "validate() (tensorrt_llm.llmapi.calibconfig class method)": [[155, "tensorrt_llm.llmapi.CalibConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.validate", false]], "validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[155, "tensorrt_llm.llmapi.DynamicBatchConfig.validate", false]], "validate() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[155, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.validate", false]], "validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.validate", false]], "validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.moeconfig class method)": [[155, "tensorrt_llm.llmapi.MoeConfig.validate", false]], "validate() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[155, "tensorrt_llm.llmapi.SchedulerConfig.validate", false]], "validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.validate", false]], "validate() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate", false]], "validate() (tensorrt_llm.plugin.pluginconfig method)": [[144, "tensorrt_llm.plugin.PluginConfig.validate", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_and_init_tokenizer", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_and_init_tokenizer", false]], "validate_attention_dp_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_attention_dp_config", false]], "validate_batch_wait_max_tokens_ratio() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_max_tokens_ratio", false]], "validate_batch_wait_timeout_iters() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_iters", false]], "validate_batch_wait_timeout_ms() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_ms", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_remaining", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_with_runtime_params", false]], "validate_capture_num_tokens() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.validate_capture_num_tokens", false]], "validate_checkpoint_format() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_checkpoint_format", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[155, "tensorrt_llm.llmapi.CudaGraphConfig.validate_cuda_graph_max_batch_size", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[155, "tensorrt_llm.llmapi.AutoDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[155, "tensorrt_llm.llmapi.EagleDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.MTPDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.NGramDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_dtype() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype", false]], "validate_dtype() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype", false]], "validate_dtype_not_auto() (tensorrt_llm.plugin.pluginconfig class method)": [[144, "tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto", false]], "validate_enable_build_cache() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache", false]], "validate_free_gpu_memory_fraction() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.validate_free_gpu_memory_fraction", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node", false]], "validate_kv_cache_dtype() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype", false]], "validate_load_balancer() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_lora_config_consistency", false]], "validate_max_attention_window() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_attention_window", false]], "validate_max_gpu_total_bytes() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[155, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_gpu_total_bytes", false]], "validate_misc() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_misc", false]], "validate_model() (tensorrt_llm.llmapi.torchllmargs class method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model", false]], "validate_model() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model_format_misc", false]], "validate_parallel_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config", false]], "validate_parallel_config() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_parallel_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_peft_cache_config", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[155, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "validate_quant_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config", false]], "validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls", false]], "validate_runtime_args() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args", false]], "validate_runtime_args() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_runtime_args", false]], "validate_speculative_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config", false]], "validate_speculative_config() (tensorrt_llm.llmapi.trtllmargs method)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.validate_speculative_config", false]], "validate_stream_interval() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_stream_interval", false]], "validate_torch_compile_config() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.validate_torch_compile_config", false]], "validate_torch_compile_max_num_streams() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[155, "tensorrt_llm.llmapi.TorchCompileConfig.validate_torch_compile_max_num_streams", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[143, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[141, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[146, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[146, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[146, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[146, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[146, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[146, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a16_mxfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A16_MXFP4", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_mxfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_FP8", false]], "w4a8_mxfp4_mxfp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_MXFP8", false]], "w4a8_nvfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_NVFP4_FP8", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[155, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "warn_on_unstable_feature_usage() (tensorrt_llm.llmapi.torchllmargs method)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[142, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[142, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[142, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_only_groupwise_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.weight_only_groupwise_quant_matmul_plugin", false]], "weight_only_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[144, "tensorrt_llm.plugin.PluginConfig.weight_only_quant_matmul_plugin", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[155, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[141, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[143, "tensorrt_llm.models.WhisperEncoder", false]], "window_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[155, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size", false]], "with_traceback() (tensorrt_llm.llmapi.requesterror method)": [[155, "tensorrt_llm.llmapi.RequestError.with_traceback", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[155, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[155, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "write_interval (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[155, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.write_interval", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[141, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[141, "tensorrt_llm.functional.RotaryScalingType.yarn", false]], "zfill() (tensorrt_llm.llmapi.batchingtype method)": [[155, "tensorrt_llm.llmapi.BatchingType.zfill", false]], "zfill() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[155, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[155, "tensorrt_llm.llmapi.ContextChunkingPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.quantalgo method)": [[155, "tensorrt_llm.llmapi.QuantAlgo.zfill", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", "tensorrt_llm::executor::CacheSaltIDType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::DEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::MPI"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::NIXL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::UCX"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::backendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", "tensorrt_llm::executor::CacheTransceiverConfig::getBackendType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxTokensInBuffer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::mBackendType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferSenderFutureTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxTokensInBuffer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType::backendType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs::kvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs::kvTransferTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer::maxTokensInBuffer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::failFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", "tensorrt_llm::executor::ExecutorConfig::getFailFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", "tensorrt_llm::executor::ExecutorConfig::mFailFastOnAttentionWindowTooLarge"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge::failFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kLLGUIDANCE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", "tensorrt_llm::executor::IterationStats::specDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::attentionDpRank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", "tensorrt_llm::executor::KVCacheEvent::attentionDpRank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", "tensorrt_llm::executor::KVCacheEvent::windowSize"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::attentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::useUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", "tensorrt_llm::executor::KvCacheConfig::getAttentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", "tensorrt_llm::executor::KvCacheConfig::getMaxGpuTotalBytes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", "tensorrt_llm::executor::KvCacheConfig::getUseUvm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", "tensorrt_llm::executor::KvCacheConfig::kDefaultGpuMemFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", "tensorrt_llm::executor::KvCacheConfig::mAttentionDpEventsGatherPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", "tensorrt_llm::executor::KvCacheConfig::mMaxGpuTotalBytes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", "tensorrt_llm::executor::KvCacheConfig::mUseUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs::attentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes::maxGpuTotalBytes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm::useUvm"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", "tensorrt_llm::executor::MultimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalHashes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalLengths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalPositions"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalHashes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalLengths"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalPositions"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", "tensorrt_llm::executor::MultimodalInput::mMultimodalHashes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalLengths"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalPositions"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::cacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", "tensorrt_llm::executor::Request::getCacheSaltID"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", "tensorrt_llm::executor::Request::getMultimodalInput"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID::cacheSaltID"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput::multimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", "tensorrt_llm::executor::Result::avgDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::promptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", "tensorrt_llm::executor::SamplingConfig::getPromptIgnoreLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", "tensorrt_llm::executor::SamplingConfig::mPromptIgnoreLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize::kvCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::multiThread"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::fileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::isOffload"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::memoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::~BaseLoopbackAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator==::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mAttentionLayerNumPerPP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mContextParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", "tensorrt_llm::executor::kv_cache::CacheState::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", "tensorrt_llm::executor::kv_cache::CacheState::getHasIndexerKCache"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerDimPerHead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerKCacheQuantBlockSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", "tensorrt_llm::executor::kv_cache::CacheState::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", "tensorrt_llm::executor::kv_cache::CacheState::mHasIndexerKCache"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerDimPerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerKCacheQuantBlockSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", "tensorrt_llm::executor::kv_cache::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::filename"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::flags"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::mode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", "tensorrt_llm::executor::kv_cache::FileDesc::fd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", "tensorrt_llm::executor::kv_cache::FileDesc::getFd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", "tensorrt_llm::executor::kv_cache::FileDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", "tensorrt_llm::executor::kv_cache::FileDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", "tensorrt_llm::executor::kv_cache::FileDesc::~FileDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", "tensorrt_llm::executor::kv_cache::FileDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs::descs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::FileDescs::getDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", "tensorrt_llm::executor::kv_cache::FileDescs::mDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::backend"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::other"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::~Configurator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ConfiguratorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurators"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::create"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::~Creator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CreatorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID_STATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::RELEASED"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::INVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::RELEASED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release::destructing"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mConfigurators"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mCreator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::materialize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator bool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator=::other"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::release"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::status"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::~CUDAVirtualMemoryChunk"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", "tensorrt_llm::runtime::CacheSaltIDType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CPU"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::background"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned::n"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::backgroundConfiguration"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration10mAlignmentE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mAlignment"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackground"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mTag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::tag"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator::config"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::PINNED"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Pointer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::CPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::PINNED"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::device"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::ptr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::mConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::operator bool"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryManager"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mEntryIt"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::PointerMemoryMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::TagEntryMap"], [1, 3, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 8, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::Configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::memory"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle::handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mBadHandles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mEntries"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMemories"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMutex"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", "tensorrt_llm::runtime::CudaVirtualMemoryManager::retrieveBadHandles"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove::handle"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", "tensorrt_llm::runtime::GptDecoder::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::prop"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::size"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator::count"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", "tensorrt_llm::runtime::LocalCreator::create"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", "tensorrt_llm::runtime::LocalCreator::mProp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", "tensorrt_llm::runtime::LocalCreator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::handle"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", "tensorrt_llm::runtime::MemsetConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::value"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", "tensorrt_llm::runtime::MemsetConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", "tensorrt_llm::runtime::MemsetConfigurator::mFirstTime"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", "tensorrt_llm::runtime::MemsetConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", "tensorrt_llm::runtime::MemsetConfigurator::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", "tensorrt_llm::runtime::MemsetConfigurator::mValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MemsetConfigurator::setup"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MemsetConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::layers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", "tensorrt_llm::runtime::MulticastConfigurator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", "tensorrt_llm::runtime::MulticastConfigurator::mBindOffset"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", "tensorrt_llm::runtime::MulticastConfigurator::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", "tensorrt_llm::runtime::MulticastConfigurator::mMulticast"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", "tensorrt_llm::runtime::MulticastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MulticastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", "tensorrt_llm::runtime::OffloadConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::backType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::ondemand"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::stream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", "tensorrt_llm::runtime::OffloadConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", "tensorrt_llm::runtime::OffloadConfigurator::mBackType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", "tensorrt_llm::runtime::OffloadConfigurator::mBackedStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", "tensorrt_llm::runtime::OffloadConfigurator::mOndemand"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", "tensorrt_llm::runtime::OffloadConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", "tensorrt_llm::runtime::OffloadConfigurator::mStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::handle"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", "tensorrt_llm::runtime::SamplingConfig::promptIgnoreLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", "tensorrt_llm::runtime::UnicastConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::desc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", "tensorrt_llm::runtime::UnicastConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", "tensorrt_llm::runtime::UnicastConfigurator::mDesc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", "tensorrt_llm::runtime::UnicastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::UnicastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::clearVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getEagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getExplicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGenerationSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getLookaheadBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxNumSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::beamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps::generationSteps"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxNumSequences"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers::bufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::getVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", "tensorrt_llm::runtime::getVirtualMemoryManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [146, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[141, 9, 0, "-", "functional"], [143, 9, 0, "-", "models"], [144, 9, 0, "-", "plugin"], [145, 9, 0, "-", "quantization"], [146, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[141, 10, 1, "", "AllReduceFusionOp"], [141, 10, 1, "", "AllReduceParams"], [141, 10, 1, "", "AllReduceStrategy"], [141, 10, 1, "", "AttentionMaskType"], [141, 10, 1, "", "Conditional"], [141, 10, 1, "", "DimRange"], [141, 10, 1, "", "LayerNormPositionType"], [141, 10, 1, "", "LayerNormType"], [141, 10, 1, "", "MLPType"], [141, 10, 1, "", "MoEAllReduceParams"], [141, 10, 1, "", "PositionEmbeddingType"], [141, 10, 1, "", "RopeEmbeddingUtils"], [141, 10, 1, "", "RotaryScalingType"], [141, 10, 1, "", "SideStreamIDType"], [141, 10, 1, "", "SliceInputType"], [141, 10, 1, "", "Tensor"], [141, 14, 1, "", "abs"], [141, 14, 1, "", "activation"], [141, 14, 1, "", "add"], [141, 14, 1, "", "allgather"], [141, 14, 1, "", "allreduce"], [141, 14, 1, "", "arange"], [141, 14, 1, "", "argmax"], [141, 14, 1, "", "assertion"], [141, 14, 1, "", "avg_pool2d"], [141, 14, 1, "", "bert_attention"], [141, 14, 1, "", "broadcast_helper"], [141, 14, 1, "", "cast"], [141, 14, 1, "", "categorical_sample"], [141, 14, 1, "", "chunk"], [141, 14, 1, "", "clip"], [141, 14, 1, "", "concat"], [141, 14, 1, "", "constant"], [141, 14, 1, "", "constant_to_tensor_"], [141, 14, 1, "", "constants_to_tensors_"], [141, 14, 1, "", "conv1d"], [141, 14, 1, "", "conv2d"], [141, 14, 1, "", "conv3d"], [141, 14, 1, "", "conv_transpose2d"], [141, 14, 1, "", "cos"], [141, 14, 1, "", "cp_split_plugin"], [141, 14, 1, "", "create_allreduce_plugin"], [141, 14, 1, "", "cuda_stream_sync"], [141, 14, 1, "", "cumsum"], [141, 14, 1, "", "div"], [141, 14, 1, "", "dora_plugin"], [141, 14, 1, "", "einsum"], [141, 14, 1, "", "elementwise_binary"], [141, 14, 1, "", "embedding"], [141, 14, 1, "", "eq"], [141, 14, 1, "", "exp"], [141, 14, 1, "", "expand"], [141, 14, 1, "", "expand_dims"], [141, 14, 1, "", "expand_dims_like"], [141, 14, 1, "", "expand_mask"], [141, 14, 1, "", "flatten"], [141, 14, 1, "", "flip"], [141, 14, 1, "", "floordiv"], [141, 14, 1, "", "gather"], [141, 14, 1, "", "gather_last_token_logits"], [141, 14, 1, "", "gather_nd"], [141, 14, 1, "", "gegelu"], [141, 14, 1, "", "geglu"], [141, 14, 1, "", "gelu"], [141, 14, 1, "", "gemm_allreduce"], [141, 14, 1, "", "gemm_swiglu"], [141, 14, 1, "", "generate_alibi_biases"], [141, 14, 1, "", "generate_alibi_slopes"], [141, 14, 1, "", "generate_logn_scaling"], [141, 14, 1, "", "gpt_attention"], [141, 14, 1, "", "group_norm"], [141, 14, 1, "", "gt"], [141, 14, 1, "", "identity"], [141, 14, 1, "", "index_select"], [141, 14, 1, "", "int_clip"], [141, 14, 1, "", "interpolate"], [141, 14, 1, "", "is_gated_activation"], [141, 14, 1, "", "layer_norm"], [141, 14, 1, "", "log"], [141, 14, 1, "", "log_softmax"], [141, 14, 1, "", "lora_plugin"], [141, 14, 1, "", "low_latency_gemm"], [141, 14, 1, "", "low_latency_gemm_swiglu"], [141, 14, 1, "", "lt"], [141, 14, 1, "", "mamba_conv1d"], [141, 14, 1, "", "masked_scatter"], [141, 14, 1, "", "masked_select"], [141, 14, 1, "", "matmul"], [141, 14, 1, "", "max"], [141, 14, 1, "", "maximum"], [141, 14, 1, "", "mean"], [141, 14, 1, "", "meshgrid2d"], [141, 14, 1, "", "min"], [141, 14, 1, "", "minimum"], [141, 14, 1, "", "modulo"], [141, 14, 1, "", "mul"], [141, 14, 1, "", "non_gated_version"], [141, 14, 1, "", "nonzero"], [141, 14, 1, "", "not_op"], [141, 14, 1, "", "op_and"], [141, 14, 1, "", "op_or"], [141, 14, 1, "", "op_xor"], [141, 14, 1, "", "outer"], [141, 14, 1, "", "pad"], [141, 14, 1, "", "permute"], [141, 14, 1, "", "pow"], [141, 14, 1, "", "prod"], [141, 14, 1, "", "quick_gelu"], [141, 14, 1, "", "rand"], [141, 14, 1, "", "rearrange"], [141, 14, 1, "", "recv"], [141, 14, 1, "", "reduce"], [141, 14, 1, "", "reduce_scatter"], [141, 14, 1, "", "relu"], [141, 14, 1, "", "repeat"], [141, 14, 1, "", "repeat_interleave"], [141, 14, 1, "", "rg_lru"], [141, 14, 1, "", "rms_norm"], [141, 14, 1, "", "round"], [141, 14, 1, "", "scatter"], [141, 14, 1, "", "scatter_nd"], [141, 14, 1, "", "select"], [141, 14, 1, "", "selective_scan"], [141, 14, 1, "", "send"], [141, 14, 1, "", "shape"], [141, 14, 1, "", "sigmoid"], [141, 14, 1, "", "silu"], [141, 14, 1, "", "sin"], [141, 14, 1, "", "slice"], [141, 14, 1, "", "softmax"], [141, 14, 1, "", "softplus"], [141, 14, 1, "", "split"], [141, 14, 1, "", "sqrt"], [141, 14, 1, "", "squared_relu"], [141, 14, 1, "", "squeeze"], [141, 14, 1, "", "stack"], [141, 14, 1, "", "sub"], [141, 14, 1, "", "sum"], [141, 14, 1, "", "swiglu"], [141, 14, 1, "", "tanh"], [141, 14, 1, "", "topk"], [141, 14, 1, "", "transpose"], [141, 14, 1, "", "unary"], [141, 14, 1, "", "unbind"], [141, 14, 1, "", "unsqueeze"], [141, 14, 1, "", "view"], [141, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[141, 11, 1, "", "LAST_PROCESS_FOR_UB"], [141, 11, 1, "", "MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM"], [141, 11, 1, "", "NONE"], [141, 11, 1, "", "RESIDUAL_RMS_NORM"], [141, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [141, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [141, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [141, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [141, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[141, 12, 1, "", "has_affine"], [141, 12, 1, "", "has_bias"], [141, 12, 1, "", "has_scale"], [141, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[141, 11, 1, "", "AUTO"], [141, 11, 1, "", "LOWPRECISION"], [141, 11, 1, "", "MIN_LATENCY"], [141, 11, 1, "", "MNNVL"], [141, 11, 1, "", "NCCL"], [141, 11, 1, "", "NCCL_SYMMETRIC"], [141, 11, 1, "", "ONESHOT"], [141, 11, 1, "", "TWOSHOT"], [141, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[141, 11, 1, "", "bidirectional"], [141, 11, 1, "", "bidirectionalglm"], [141, 11, 1, "", "blocksparse"], [141, 11, 1, "", "causal"], [141, 11, 1, "", "custom_mask"], [141, 11, 1, "", "padding"], [141, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[141, 12, 1, "", "add_input"], [141, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[141, 11, 1, "", "post_layernorm"], [141, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[141, 11, 1, "", "GroupNorm"], [141, 11, 1, "", "LayerNorm"], [141, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[141, 11, 1, "", "FusedGatedMLP"], [141, 11, 1, "", "GatedMLP"], [141, 11, 1, "", "MLP"]], "tensorrt_llm.functional.MoEAllReduceParams": [[141, 12, 1, "", "is_valid"]], "tensorrt_llm.functional.PositionEmbeddingType": [[141, 11, 1, "", "alibi"], [141, 11, 1, "", "alibi_with_scale"], [141, 11, 1, "", "chatglm"], [141, 12, 1, "", "choices"], [141, 11, 1, "", "deferred"], [141, 12, 1, "", "from_string"], [141, 12, 1, "", "is_alibi"], [141, 12, 1, "", "is_deferred"], [141, 12, 1, "", "is_mrope"], [141, 12, 1, "", "is_rope"], [141, 11, 1, "", "learned_absolute"], [141, 11, 1, "", "long_rope"], [141, 11, 1, "", "mrope"], [141, 11, 1, "", "relative"], [141, 11, 1, "", "rope_gpt_neox"], [141, 11, 1, "", "rope_gptj"], [141, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[141, 12, 1, "", "apply_llama3_scaling"], [141, 12, 1, "", "apply_rotary_pos_emb"], [141, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [141, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [141, 12, 1, "", "create_fake_weight"], [141, 12, 1, "", "create_sinusoidal_positions"], [141, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [141, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [141, 12, 1, "", "create_sinusoidal_positions_long_rope"], [141, 12, 1, "", "create_sinusoidal_positions_long_rope_for_attention_plugin"], [141, 12, 1, "", "create_sinusoidal_positions_yarn"], [141, 12, 1, "", "rotate_every_two"], [141, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[141, 11, 1, "", "dynamic"], [141, 12, 1, "", "from_string"], [141, 11, 1, "", "linear"], [141, 11, 1, "", "llama3"], [141, 11, 1, "", "longrope"], [141, 11, 1, "", "mrope"], [141, 11, 1, "", "none"], [141, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[141, 11, 1, "", "disable"], [141, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[141, 11, 1, "", "axes"], [141, 11, 1, "", "data"], [141, 11, 1, "", "fill_value"], [141, 11, 1, "", "size"], [141, 11, 1, "", "start"], [141, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[141, 12, 1, "", "abs"], [141, 12, 1, "", "cast"], [141, 13, 1, "", "dtype"], [141, 12, 1, "", "flatten"], [141, 12, 1, "", "get_parent"], [141, 12, 1, "", "get_users"], [141, 12, 1, "", "is_dynamic"], [141, 12, 1, "", "is_trt_wrapper"], [141, 13, 1, "", "location"], [141, 12, 1, "", "log"], [141, 12, 1, "", "mark_output"], [141, 12, 1, "", "max"], [141, 12, 1, "", "mean"], [141, 13, 1, "", "name"], [141, 12, 1, "", "ndim"], [141, 13, 1, "", "network"], [141, 12, 1, "", "permute"], [141, 12, 1, "", "rank"], [141, 12, 1, "", "repeat"], [141, 12, 1, "", "replace_all_uses_with"], [141, 12, 1, "", "select"], [141, 13, 1, "", "shape"], [141, 12, 1, "", "size"], [141, 12, 1, "", "split"], [141, 12, 1, "", "sqrt"], [141, 12, 1, "", "squeeze"], [141, 12, 1, "", "transpose"], [141, 12, 1, "", "unbind"], [141, 12, 1, "", "unsqueeze"], [141, 12, 1, "", "view"]], "tensorrt_llm.layers": [[142, 9, 0, "-", "activation"], [142, 9, 0, "-", "attention"], [142, 9, 0, "-", "cast"], [142, 9, 0, "-", "conv"], [142, 9, 0, "-", "embedding"], [142, 9, 0, "-", "linear"], [142, 9, 0, "-", "mlp"], [142, 9, 0, "-", "normalization"], [142, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[142, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[142, 10, 1, "", "Attention"], [142, 10, 1, "", "AttentionMaskParams"], [142, 10, 1, "", "AttentionParams"], [142, 10, 1, "", "BertAttention"], [142, 10, 1, "", "BlockSparseAttnParams"], [142, 10, 1, "", "CogVLMAttention"], [142, 10, 1, "", "DeepseekV2Attention"], [142, 10, 1, "", "DiffusersAttention"], [142, 10, 1, "", "KeyValueCacheParams"], [142, 10, 1, "", "MropeParams"], [142, 10, 1, "", "SpecDecodingParams"], [142, 14, 1, "", "compute_relative_bias"], [142, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[142, 12, 1, "", "create_attention_const_params"], [142, 12, 1, "", "fill_attention_params"], [142, 12, 1, "", "forward"], [142, 12, 1, "", "postprocess"], [142, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[142, 12, 1, "", "fill_attention_const_params_for_long_rope"], [142, 12, 1, "", "fill_attention_const_params_for_rope"], [142, 12, 1, "", "is_valid"], [142, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[142, 12, 1, "", "forward"], [142, 12, 1, "", "postprocess"], [142, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[142, 12, 1, "", "forward"], [142, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[142, 12, 1, "", "fill_none_tensor_list"], [142, 12, 1, "", "get_first_past_key_value"], [142, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[142, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[142, 10, 1, "", "Conv1d"], [142, 10, 1, "", "Conv2d"], [142, 10, 1, "", "Conv3d"], [142, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[142, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [142, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [142, 10, 1, "", "Embedding"], [142, 10, 1, "", "LabelEmbedding"], [142, 10, 1, "", "PixArtAlphaTextProjection"], [142, 10, 1, "", "PromptTuningEmbedding"], [142, 10, 1, "", "SD3PatchEmbed"], [142, 10, 1, "", "TimestepEmbedding"], [142, 10, 1, "", "Timesteps"], [142, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [142, 14, 1, "", "get_2d_sincos_pos_embed"], [142, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [142, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[142, 12, 1, "", "forward"], [142, 12, 1, "", "postprocess"], [142, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[142, 12, 1, "", "forward"], [142, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[142, 12, 1, "", "cropped_pos_embed"], [142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[142, 11, 1, "", "ColumnLinear"], [142, 10, 1, "", "Linear"], [142, 10, 1, "", "LinearBase"], [142, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[142, 12, 1, "", "collect_and_bias"], [142, 12, 1, "", "postprocess"], [142, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[142, 12, 1, "", "collect_and_bias"], [142, 12, 1, "", "forward"], [142, 12, 1, "", "get_weight"], [142, 12, 1, "", "multiply_and_lora"], [142, 12, 1, "", "multiply_collect"], [142, 12, 1, "", "tp_split_dim"], [142, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[142, 12, 1, "", "collect_and_bias"], [142, 12, 1, "", "multiply_collect"], [142, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[142, 10, 1, "", "FusedGatedMLP"], [142, 10, 1, "", "GatedMLP"], [142, 10, 1, "", "LinearActivation"], [142, 10, 1, "", "LinearApproximateGELU"], [142, 10, 1, "", "LinearGEGLU"], [142, 10, 1, "", "LinearGELU"], [142, 10, 1, "", "LinearSwiGLU"], [142, 10, 1, "", "MLP"], [142, 14, 1, "", "fc_gate_dora"], [142, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[142, 12, 1, "", "fc_gate"], [142, 12, 1, "", "fc_gate_plugin"], [142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[142, 10, 1, "", "AdaLayerNorm"], [142, 10, 1, "", "AdaLayerNormContinuous"], [142, 10, 1, "", "AdaLayerNormZero"], [142, 10, 1, "", "AdaLayerNormZeroSingle"], [142, 10, 1, "", "GroupNorm"], [142, 10, 1, "", "LayerNorm"], [142, 10, 1, "", "RmsNorm"], [142, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[142, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[142, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[142, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[155, 10, 1, "", "AttentionDpConfig"], [155, 10, 1, "", "AutoDecodingConfig"], [155, 10, 1, "", "BatchingType"], [155, 10, 1, "", "BuildCacheConfig"], [155, 10, 1, "", "BuildConfig"], [155, 10, 1, "", "CacheTransceiverConfig"], [155, 10, 1, "", "CalibConfig"], [155, 10, 1, "", "CapacitySchedulerPolicy"], [155, 10, 1, "", "CompletionOutput"], [155, 10, 1, "", "ContextChunkingPolicy"], [155, 10, 1, "", "CudaGraphConfig"], [155, 10, 1, "", "DeepSeekSparseAttentionConfig"], [155, 10, 1, "", "DisaggregatedParams"], [155, 10, 1, "", "DraftTargetDecodingConfig"], [155, 10, 1, "", "DynamicBatchConfig"], [155, 10, 1, "", "EagleDecodingConfig"], [155, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [155, 10, 1, "", "GuidedDecodingParams"], [155, 10, 1, "", "KvCacheConfig"], [155, 10, 1, "", "KvCacheRetentionConfig"], [155, 10, 1, "", "LLM"], [155, 11, 1, "", "LlmArgs"], [155, 10, 1, "", "LoRARequest"], [155, 10, 1, "", "LookaheadDecodingConfig"], [155, 10, 1, "", "MTPDecodingConfig"], [155, 10, 1, "", "MedusaDecodingConfig"], [155, 10, 1, "", "MoeConfig"], [155, 10, 1, "", "MpiCommSession"], [155, 10, 1, "", "MultimodalEncoder"], [155, 10, 1, "", "NGramDecodingConfig"], [155, 10, 1, "", "QuantAlgo"], [155, 10, 1, "", "QuantConfig"], [155, 10, 1, "", "RequestError"], [155, 10, 1, "", "RequestOutput"], [155, 10, 1, "", "RocketSparseAttentionConfig"], [155, 10, 1, "", "SamplingParams"], [155, 10, 1, "", "SaveHiddenStatesDecodingConfig"], [155, 10, 1, "", "SchedulerConfig"], [155, 10, 1, "", "TorchCompileConfig"], [155, 10, 1, "", "TorchLlmArgs"], [155, 10, 1, "", "TrtLlmArgs"], [155, 10, 1, "", "UserProvidedDecodingConfig"]], "tensorrt_llm.llmapi.AttentionDpConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "batching_wait_iters"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "enable_balance"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "json"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 15, 1, "", "timeout_iters"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.AttentionDpConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.AutoDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.AutoDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.BatchingType": [[155, 11, 1, "", "INFLIGHT"], [155, 11, 1, "", "STATIC"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "capitalize"], [155, 12, 1, "", "casefold"], [155, 12, 1, "", "center"], [155, 12, 1, "", "count"], [155, 12, 1, "", "encode"], [155, 12, 1, "", "endswith"], [155, 12, 1, "", "expandtabs"], [155, 12, 1, "", "find"], [155, 12, 1, "", "format"], [155, 12, 1, "", "format_map"], [155, 12, 1, "", "index"], [155, 12, 1, "", "isalnum"], [155, 12, 1, "", "isalpha"], [155, 12, 1, "", "isascii"], [155, 12, 1, "", "isdecimal"], [155, 12, 1, "", "isdigit"], [155, 12, 1, "", "isidentifier"], [155, 12, 1, "", "islower"], [155, 12, 1, "", "isnumeric"], [155, 12, 1, "", "isprintable"], [155, 12, 1, "", "isspace"], [155, 12, 1, "", "istitle"], [155, 12, 1, "", "isupper"], [155, 12, 1, "", "join"], [155, 12, 1, "", "ljust"], [155, 12, 1, "", "lower"], [155, 12, 1, "", "lstrip"], [155, 12, 1, "", "maketrans"], [155, 12, 1, "", "partition"], [155, 12, 1, "", "removeprefix"], [155, 12, 1, "", "removesuffix"], [155, 12, 1, "", "replace"], [155, 12, 1, "", "rfind"], [155, 12, 1, "", "rindex"], [155, 12, 1, "", "rjust"], [155, 12, 1, "", "rpartition"], [155, 12, 1, "", "rsplit"], [155, 12, 1, "", "rstrip"], [155, 12, 1, "", "split"], [155, 12, 1, "", "splitlines"], [155, 12, 1, "", "startswith"], [155, 12, 1, "", "strip"], [155, 12, 1, "", "swapcase"], [155, 12, 1, "", "title"], [155, 12, 1, "", "translate"], [155, 12, 1, "", "upper"], [155, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[155, 12, 1, "", "__init__"], [155, 13, 1, "id13", "cache_root"], [155, 13, 1, "id14", "max_cache_storage_gb"], [155, 13, 1, "id15", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[155, 12, 1, "", "__init__"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "dry_run"], [155, 15, 1, "", "enable_debug_output"], [155, 15, 1, "", "force_num_profiles"], [155, 12, 1, "", "from_json_file"], [155, 12, 1, "", "from_orm"], [155, 15, 1, "", "gather_context_logits"], [155, 15, 1, "", "gather_generation_logits"], [155, 15, 1, "", "input_timing_cache"], [155, 12, 1, "", "json"], [155, 15, 1, "", "kv_cache_type"], [155, 15, 1, "", "lora_config"], [155, 15, 1, "", "max_batch_size"], [155, 15, 1, "", "max_beam_width"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_encoder_input_len"], [155, 15, 1, "", "max_input_len"], [155, 15, 1, "", "max_num_tokens"], [155, 15, 1, "", "max_prompt_embedding_table_size"], [155, 15, 1, "", "max_seq_len"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "monitor_memory"], [155, 15, 1, "", "opt_batch_size"], [155, 15, 1, "", "opt_num_tokens"], [155, 15, 1, "", "output_timing_cache"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "plugin_config"], [155, 15, 1, "", "profiling_verbosity"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 15, 1, "", "speculative_decoding_mode"], [155, 15, 1, "", "strongly_typed"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "update_kv_cache_type"], [155, 15, 1, "", "use_mrope"], [155, 15, 1, "", "use_refit"], [155, 15, 1, "", "use_strip_plan"], [155, 12, 1, "", "validate"], [155, 15, 1, "", "visualize_network"], [155, 15, 1, "", "weight_sparsity"], [155, 15, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "backend"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 12, 1, "", "json"], [155, 15, 1, "", "kv_transfer_sender_future_timeout_ms"], [155, 15, 1, "", "kv_transfer_timeout_ms"], [155, 15, 1, "", "max_tokens_in_buffer"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CacheTransceiverConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CalibConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "calib_batch_size"], [155, 15, 1, "", "calib_batches"], [155, 15, 1, "", "calib_dataset"], [155, 15, 1, "", "calib_max_seq_length"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 15, 1, "", "device"], [155, 12, 1, "", "dict"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "json"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "random_seed"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "to_dict"], [155, 15, 1, "", "tokenizer_max_seq_length"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CalibConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[155, 11, 1, "", "GUARANTEED_NO_EVICT"], [155, 11, 1, "", "MAX_UTILIZATION"], [155, 11, 1, "", "STATIC_BATCH"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "capitalize"], [155, 12, 1, "", "casefold"], [155, 12, 1, "", "center"], [155, 12, 1, "", "count"], [155, 12, 1, "", "encode"], [155, 12, 1, "", "endswith"], [155, 12, 1, "", "expandtabs"], [155, 12, 1, "", "find"], [155, 12, 1, "", "format"], [155, 12, 1, "", "format_map"], [155, 12, 1, "", "index"], [155, 12, 1, "", "isalnum"], [155, 12, 1, "", "isalpha"], [155, 12, 1, "", "isascii"], [155, 12, 1, "", "isdecimal"], [155, 12, 1, "", "isdigit"], [155, 12, 1, "", "isidentifier"], [155, 12, 1, "", "islower"], [155, 12, 1, "", "isnumeric"], [155, 12, 1, "", "isprintable"], [155, 12, 1, "", "isspace"], [155, 12, 1, "", "istitle"], [155, 12, 1, "", "isupper"], [155, 12, 1, "", "join"], [155, 12, 1, "", "ljust"], [155, 12, 1, "", "lower"], [155, 12, 1, "", "lstrip"], [155, 12, 1, "", "maketrans"], [155, 12, 1, "", "partition"], [155, 12, 1, "", "removeprefix"], [155, 12, 1, "", "removesuffix"], [155, 12, 1, "", "replace"], [155, 12, 1, "", "rfind"], [155, 12, 1, "", "rindex"], [155, 12, 1, "", "rjust"], [155, 12, 1, "", "rpartition"], [155, 12, 1, "", "rsplit"], [155, 12, 1, "", "rstrip"], [155, 12, 1, "", "split"], [155, 12, 1, "", "splitlines"], [155, 12, 1, "", "startswith"], [155, 12, 1, "", "strip"], [155, 12, 1, "", "swapcase"], [155, 12, 1, "", "title"], [155, 12, 1, "", "translate"], [155, 12, 1, "", "upper"], [155, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CompletionOutput": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "additional_context_outputs"], [155, 11, 1, "", "additional_generation_outputs"], [155, 11, 1, "", "cumulative_logprob"], [155, 11, 1, "", "disaggregated_params"], [155, 11, 1, "", "finish_reason"], [155, 11, 1, "", "generation_logits"], [155, 11, 1, "", "index"], [155, 13, 1, "id2", "length"], [155, 11, 1, "", "logprobs"], [155, 13, 1, "id3", "logprobs_diff"], [155, 11, 1, "", "prompt_logprobs"], [155, 11, 1, "", "request_perf_metrics"], [155, 11, 1, "", "stop_reason"], [155, 11, 1, "", "text"], [155, 13, 1, "id4", "text_diff"], [155, 11, 1, "", "token_ids"], [155, 13, 1, "id5", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[155, 11, 1, "", "EQUAL_PROGRESS"], [155, 11, 1, "", "FIRST_COME_FIRST_SERVED"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "capitalize"], [155, 12, 1, "", "casefold"], [155, 12, 1, "", "center"], [155, 12, 1, "", "count"], [155, 12, 1, "", "encode"], [155, 12, 1, "", "endswith"], [155, 12, 1, "", "expandtabs"], [155, 12, 1, "", "find"], [155, 12, 1, "", "format"], [155, 12, 1, "", "format_map"], [155, 12, 1, "", "index"], [155, 12, 1, "", "isalnum"], [155, 12, 1, "", "isalpha"], [155, 12, 1, "", "isascii"], [155, 12, 1, "", "isdecimal"], [155, 12, 1, "", "isdigit"], [155, 12, 1, "", "isidentifier"], [155, 12, 1, "", "islower"], [155, 12, 1, "", "isnumeric"], [155, 12, 1, "", "isprintable"], [155, 12, 1, "", "isspace"], [155, 12, 1, "", "istitle"], [155, 12, 1, "", "isupper"], [155, 12, 1, "", "join"], [155, 12, 1, "", "ljust"], [155, 12, 1, "", "lower"], [155, 12, 1, "", "lstrip"], [155, 12, 1, "", "maketrans"], [155, 12, 1, "", "partition"], [155, 12, 1, "", "removeprefix"], [155, 12, 1, "", "removesuffix"], [155, 12, 1, "", "replace"], [155, 12, 1, "", "rfind"], [155, 12, 1, "", "rindex"], [155, 12, 1, "", "rjust"], [155, 12, 1, "", "rpartition"], [155, 12, 1, "", "rsplit"], [155, 12, 1, "", "rstrip"], [155, 12, 1, "", "split"], [155, 12, 1, "", "splitlines"], [155, 12, 1, "", "startswith"], [155, 12, 1, "", "strip"], [155, 12, 1, "", "swapcase"], [155, 12, 1, "", "title"], [155, 12, 1, "", "translate"], [155, 12, 1, "", "upper"], [155, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CudaGraphConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "batch_sizes"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "enable_padding"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "json"], [155, 15, 1, "", "max_batch_size"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_cuda_graph_max_batch_size"]], "tensorrt_llm.llmapi.CudaGraphConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 11, 1, "", "algorithm"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "get_indices_block_size"], [155, 15, 1, "", "index_head_dim"], [155, 15, 1, "", "index_n_heads"], [155, 15, 1, "", "index_topk"], [155, 15, 1, "", "indexer_max_chunk_size"], [155, 12, 1, "", "json"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "ctx_request_id"], [155, 11, 1, "", "draft_tokens"], [155, 11, 1, "", "first_gen_tokens"], [155, 12, 1, "", "get_context_phase_params"], [155, 12, 1, "", "get_request_type"], [155, 11, 1, "", "multimodal_embedding_handles"], [155, 11, 1, "", "multimodal_hashes"], [155, 11, 1, "", "opaque_state"], [155, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "dynamic_batch_moving_average_window"], [155, 15, 1, "", "enable_batch_size_tuning"], [155, 15, 1, "", "enable_max_num_tokens_tuning"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 12, 1, "", "json"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DynamicBatchConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "check_eagle_choices"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 15, 1, "", "dynamic_tree_max_topK"], [155, 15, 1, "", "eagle3_layers_to_capture"], [155, 15, 1, "", "eagle3_one_model"], [155, 15, 1, "", "eagle_choices"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 15, 1, "", "greedy_sampling"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_non_leaves_per_layer"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 13, 1, "", "num_capture_layers"], [155, 15, 1, "", "num_eagle_layers"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "posterior_threshold"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 15, 1, "", "use_dynamic_tree"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.EagleDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 15, 1, "", "cuda_graph_cache_size"], [155, 15, 1, "", "cuda_graph_mode"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "enable_context_fmha_fp32_acc"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 12, 1, "", "json"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "multi_block_mode"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "grammar"], [155, 11, 1, "", "json"], [155, 11, 1, "", "json_object"], [155, 11, 1, "", "regex"], [155, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "attention_dp_events_gather_period_ms"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 15, 1, "", "copy_on_partial_reuse"], [155, 15, 1, "", "cross_kv_cache_fraction"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "dtype"], [155, 15, 1, "", "enable_block_reuse"], [155, 15, 1, "", "enable_partial_reuse"], [155, 15, 1, "", "event_buffer_max_size"], [155, 15, 1, "", "free_gpu_memory_fraction"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 15, 1, "", "host_cache_size"], [155, 12, 1, "", "json"], [155, 15, 1, "", "mamba_ssm_cache_dtype"], [155, 15, 1, "", "max_attention_window"], [155, 15, 1, "", "max_gpu_total_bytes"], [155, 15, 1, "", "max_tokens"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "onboard_blocks"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 15, 1, "", "secondary_offload_min_priority"], [155, 15, 1, "", "sink_token_length"], [155, 15, 1, "", "tokens_per_block"], [155, 12, 1, "", "update_forward_refs"], [155, 15, 1, "", "use_uvm"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_free_gpu_memory_fraction"], [155, 16, 1, "", "validate_max_attention_window"], [155, 16, 1, "", "validate_max_gpu_total_bytes"]], "tensorrt_llm.llmapi.KvCacheConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[155, 10, 1, "", "TokenRangeRetentionConfig"], [155, 11, 1, "", "__init__"], [155, 13, 1, "", "decode_duration_ms"], [155, 13, 1, "", "decode_retention_priority"], [155, 13, 1, "", "directory"], [155, 13, 1, "", "token_range_retention_configs"], [155, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[155, 11, 1, "", "__init__"], [155, 13, 1, "", "duration_ms"], [155, 13, 1, "", "priority"], [155, 13, 1, "", "token_end"], [155, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[155, 12, 1, "", "__init__"], [155, 12, 1, "", "generate"], [155, 12, 1, "", "generate_async"], [155, 12, 1, "", "get_kv_cache_events"], [155, 12, 1, "", "get_kv_cache_events_async"], [155, 12, 1, "", "get_stats"], [155, 12, 1, "", "get_stats_async"], [155, 13, 1, "id0", "llm_id"], [155, 12, 1, "", "shutdown"], [155, 13, 1, "id1", "tokenizer"]], "tensorrt_llm.llmapi.LoRARequest": [[155, 12, 1, "", "__init__"], [155, 13, 1, "", "adapter_id"], [155, 13, 1, "", "ckpt_source"], [155, 11, 1, "", "lora_ckpt_source"], [155, 11, 1, "", "lora_int_id"], [155, 11, 1, "", "lora_name"], [155, 11, 1, "", "lora_path"], [155, 13, 1, "", "name"], [155, 13, 1, "", "path"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "calculate_speculative_resource"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_ngram_size"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 15, 1, "", "max_verification_set_size"], [155, 15, 1, "", "max_window_size"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"], [155, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 15, 1, "", "begin_thinking_phase_token"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 15, 1, "", "end_thinking_phase_token"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "mtp_eagle_one_model"], [155, 13, 1, "", "num_capture_layers"], [155, 15, 1, "", "num_nextn_predict_layers"], [155, 15, 1, "", "num_nextn_predict_layers_from_model_config"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "relaxed_delta"], [155, 15, 1, "", "relaxed_topk"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 15, 1, "", "use_mtp_vanilla"], [155, 15, 1, "", "use_relaxed_acceptance_for_thinking"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MTPDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 15, 1, "", "medusa_choices"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "num_medusa_heads"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MedusaDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MoeConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "backend"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "disable_finalize_fusion"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_balancer"], [155, 15, 1, "", "max_num_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 15, 1, "", "use_low_precision_moe_combine"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.MoeConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MpiCommSession": [[155, 12, 1, "", "__init__"], [155, 12, 1, "", "abort"], [155, 12, 1, "", "get_comm"], [155, 12, 1, "", "is_comm_session"], [155, 12, 1, "", "shutdown"], [155, 12, 1, "", "shutdown_abort"], [155, 12, 1, "", "submit"], [155, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.MultimodalEncoder": [[155, 12, 1, "", "__init__"], [155, 12, 1, "", "generate"], [155, 12, 1, "", "generate_async"], [155, 12, 1, "", "get_kv_cache_events"], [155, 12, 1, "", "get_kv_cache_events_async"], [155, 12, 1, "", "get_stats"], [155, 12, 1, "", "get_stats_async"], [155, 13, 1, "", "llm_id"], [155, 12, 1, "", "shutdown"], [155, 13, 1, "", "tokenizer"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 15, 1, "", "is_keep_all"], [155, 13, 1, "", "is_linear_tree"], [155, 15, 1, "", "is_public_pool"], [155, 15, 1, "", "is_use_oldest"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_matching_ngram_size"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.NGramDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.QuantAlgo": [[155, 11, 1, "", "FP8"], [155, 11, 1, "", "FP8_BLOCK_SCALES"], [155, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [155, 11, 1, "", "INT8"], [155, 11, 1, "", "MIXED_PRECISION"], [155, 11, 1, "", "NO_QUANT"], [155, 11, 1, "", "NVFP4"], [155, 11, 1, "", "NVFP4_AWQ"], [155, 11, 1, "", "W4A16"], [155, 11, 1, "", "W4A16_AWQ"], [155, 11, 1, "", "W4A16_GPTQ"], [155, 11, 1, "", "W4A16_MXFP4"], [155, 11, 1, "", "W4A8_AWQ"], [155, 11, 1, "", "W4A8_MXFP4_FP8"], [155, 11, 1, "", "W4A8_MXFP4_MXFP8"], [155, 11, 1, "", "W4A8_NVFP4_FP8"], [155, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [155, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [155, 11, 1, "", "W8A16"], [155, 11, 1, "", "W8A16_GPTQ"], [155, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [155, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [155, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [155, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [155, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "capitalize"], [155, 12, 1, "", "casefold"], [155, 12, 1, "", "center"], [155, 12, 1, "", "count"], [155, 12, 1, "", "encode"], [155, 12, 1, "", "endswith"], [155, 12, 1, "", "expandtabs"], [155, 12, 1, "", "find"], [155, 12, 1, "", "format"], [155, 12, 1, "", "format_map"], [155, 12, 1, "", "index"], [155, 12, 1, "", "isalnum"], [155, 12, 1, "", "isalpha"], [155, 12, 1, "", "isascii"], [155, 12, 1, "", "isdecimal"], [155, 12, 1, "", "isdigit"], [155, 12, 1, "", "isidentifier"], [155, 12, 1, "", "islower"], [155, 12, 1, "", "isnumeric"], [155, 12, 1, "", "isprintable"], [155, 12, 1, "", "isspace"], [155, 12, 1, "", "istitle"], [155, 12, 1, "", "isupper"], [155, 12, 1, "", "join"], [155, 12, 1, "", "ljust"], [155, 12, 1, "", "lower"], [155, 12, 1, "", "lstrip"], [155, 12, 1, "", "maketrans"], [155, 12, 1, "", "partition"], [155, 12, 1, "", "removeprefix"], [155, 12, 1, "", "removesuffix"], [155, 12, 1, "", "replace"], [155, 12, 1, "", "rfind"], [155, 12, 1, "", "rindex"], [155, 12, 1, "", "rjust"], [155, 12, 1, "", "rpartition"], [155, 12, 1, "", "rsplit"], [155, 12, 1, "", "rstrip"], [155, 12, 1, "", "split"], [155, 12, 1, "", "splitlines"], [155, 12, 1, "", "startswith"], [155, 12, 1, "", "strip"], [155, 12, 1, "", "swapcase"], [155, 12, 1, "", "title"], [155, 12, 1, "", "translate"], [155, 12, 1, "", "upper"], [155, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.QuantConfig": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "clamp_val"], [155, 11, 1, "", "exclude_modules"], [155, 12, 1, "", "from_dict"], [155, 11, 1, "", "group_size"], [155, 11, 1, "", "has_zero_point"], [155, 12, 1, "", "is_module_excluded_from_quantization"], [155, 11, 1, "", "kv_cache_quant_algo"], [155, 13, 1, "", "layer_quant_mode"], [155, 11, 1, "", "mamba_ssm_cache_dtype"], [155, 11, 1, "", "pre_quant_scale"], [155, 11, 1, "", "quant_algo"], [155, 13, 1, "", "quant_mode"], [155, 11, 1, "", "smoothquant_val"], [155, 12, 1, "", "to_dict"], [155, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestError": [[155, 12, 1, "", "__init__"], [155, 12, 1, "", "add_note"], [155, 11, 1, "", "args"], [155, 12, 1, "", "with_traceback"]], "tensorrt_llm.llmapi.RequestOutput": [[155, 10, 1, "", "PostprocWorker"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "abort"], [155, 12, 1, "", "aborted"], [155, 12, 1, "", "aresult"], [155, 12, 1, "", "clear_logprob_params"], [155, 13, 1, "id6", "context_logits"], [155, 12, 1, "", "do_tracing"], [155, 13, 1, "id7", "finished"], [155, 13, 1, "id8", "mm_embedding_handle"], [155, 13, 1, "id9", "outputs"], [155, 13, 1, "id10", "prompt"], [155, 13, 1, "id11", "prompt_token_ids"], [155, 12, 1, "", "record_stats"], [155, 13, 1, "id12", "request_id"], [155, 12, 1, "", "result"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker": [[155, 10, 1, "", "Input"], [155, 10, 1, "", "Output"], [155, 12, 1, "", "__init__"], [155, 12, 1, "", "default_record_creator"], [155, 12, 1, "", "start"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "postproc_params"], [155, 11, 1, "", "rsp"], [155, 11, 1, "", "sampling_params"], [155, 11, 1, "", "streaming"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output": [[155, 11, 1, "", "client_id"], [155, 12, 1, "", "count"], [155, 11, 1, "", "disaggregated_params"], [155, 11, 1, "", "error"], [155, 12, 1, "", "index"], [155, 11, 1, "", "is_final"], [155, 11, 1, "", "metrics"], [155, 11, 1, "", "request_perf_metrics"], [155, 11, 1, "", "res"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 11, 1, "", "algorithm"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "get_indices_block_size"], [155, 12, 1, "", "json"], [155, 15, 1, "", "kernel_size"], [155, 15, 1, "", "kt_cache_dtype"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 15, 1, "", "page_size"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "prompt_budget"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "supports_backend"], [155, 15, 1, "", "topk"], [155, 15, 1, "", "topr"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 15, 1, "", "window_size"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SamplingParams": [[155, 12, 1, "", "__init__"], [155, 11, 1, "", "add_special_tokens"], [155, 11, 1, "", "additional_model_outputs"], [155, 11, 1, "", "apply_batched_logits_processor"], [155, 11, 1, "", "bad"], [155, 11, 1, "", "bad_token_ids"], [155, 11, 1, "", "beam_search_diversity_rate"], [155, 11, 1, "", "beam_width_array"], [155, 11, 1, "", "best_of"], [155, 11, 1, "", "detokenize"], [155, 11, 1, "", "early_stopping"], [155, 11, 1, "", "embedding_bias"], [155, 11, 1, "", "end_id"], [155, 11, 1, "", "exclude_input_from_output"], [155, 11, 1, "", "frequency_penalty"], [155, 11, 1, "", "guided_decoding"], [155, 11, 1, "", "ignore_eos"], [155, 11, 1, "", "include_stop_str_in_output"], [155, 11, 1, "", "length_penalty"], [155, 11, 1, "", "logits_processor"], [155, 11, 1, "", "logprobs"], [155, 11, 1, "", "lookahead_config"], [155, 11, 1, "", "max_tokens"], [155, 11, 1, "", "min_p"], [155, 11, 1, "", "min_tokens"], [155, 11, 1, "", "n"], [155, 11, 1, "", "no_repeat_ngram_size"], [155, 11, 1, "", "pad_id"], [155, 12, 1, "", "params_imply_greedy_decoding"], [155, 11, 1, "", "presence_penalty"], [155, 11, 1, "", "prompt_ignore_length"], [155, 11, 1, "", "prompt_logprobs"], [155, 11, 1, "", "repetition_penalty"], [155, 11, 1, "", "return_context_logits"], [155, 11, 1, "", "return_encoder_output"], [155, 11, 1, "", "return_generation_logits"], [155, 11, 1, "", "return_perf_metrics"], [155, 11, 1, "", "seed"], [155, 11, 1, "", "skip_special_tokens"], [155, 11, 1, "", "spaces_between_special_tokens"], [155, 11, 1, "", "stop"], [155, 11, 1, "", "stop_token_ids"], [155, 11, 1, "", "temperature"], [155, 11, 1, "", "top_k"], [155, 11, 1, "", "top_p"], [155, 11, 1, "", "top_p_decay"], [155, 11, 1, "", "top_p_min"], [155, 11, 1, "", "top_p_reset_ids"], [155, 11, 1, "", "truncate_prompt_tokens"], [155, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 15, 1, "", "eagle3_layers_to_capture"], [155, 15, 1, "", "eagle_choices"], [155, 15, 1, "", "file_prefix"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 13, 1, "", "num_capture_layers"], [155, 15, 1, "", "output_directory"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"], [155, 15, 1, "", "write_interval"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SchedulerConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "capacity_scheduler_policy"], [155, 12, 1, "", "construct"], [155, 15, 1, "", "context_chunking_policy"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "dynamic_batch_config"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "from_pybind"], [155, 12, 1, "", "get_pybind_enum_fields"], [155, 12, 1, "", "get_pybind_variable_fields"], [155, 12, 1, "", "json"], [155, 12, 1, "", "maybe_to_pybind"], [155, 12, 1, "", "mirror_pybind_enum"], [155, 12, 1, "", "mirror_pybind_fields"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "pybind_equals"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.SchedulerConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchCompileConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "capture_num_tokens"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "enable_fullgraph"], [155, 15, 1, "", "enable_inductor"], [155, 15, 1, "", "enable_piecewise_cuda_graph"], [155, 15, 1, "", "enable_userbuffers"], [155, 12, 1, "", "from_orm"], [155, 12, 1, "", "json"], [155, 15, 1, "", "max_num_streams"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_capture_num_tokens"], [155, 16, 1, "", "validate_torch_compile_max_num_streams"]], "tensorrt_llm.llmapi.TorchCompileConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "allreduce_strategy"], [155, 15, 1, "", "attention_dp_config"], [155, 15, 1, "", "attn_backend"], [155, 15, 1, "", "backend"], [155, 15, 1, "", "batch_wait_max_tokens_ratio"], [155, 15, 1, "", "batch_wait_timeout_iters"], [155, 15, 1, "", "batch_wait_timeout_ms"], [155, 15, 1, "", "batched_logits_processor"], [155, 15, 1, "", "cache_transceiver_config"], [155, 15, 1, "", "checkpoint_format"], [155, 15, 1, "", "checkpoint_loader"], [155, 15, 1, "", "context_parallel_size"], [155, 16, 1, "", "convert_load_format"], [155, 15, 1, "", "cp_config"], [155, 15, 1, "", "cuda_graph_config"], [155, 11, 1, "", "decoding_config"], [155, 15, 1, "", "disable_flashinfer_sampling"], [155, 15, 1, "", "disable_overlap_scheduler"], [155, 15, 1, "", "dtype"], [155, 15, 1, "", "enable_attention_dp"], [155, 15, 1, "", "enable_autotuner"], [155, 15, 1, "", "enable_chunked_prefill"], [155, 15, 1, "", "enable_iter_perf_stats"], [155, 15, 1, "", "enable_iter_req_stats"], [155, 15, 1, "", "enable_layerwise_nvtx_marker"], [155, 15, 1, "", "enable_lm_head_tp_in_adp"], [155, 15, 1, "", "enable_lora"], [155, 15, 1, "", "enable_min_latency"], [155, 15, 1, "", "enable_sleep"], [155, 15, 1, "", "env_overrides"], [155, 13, 1, "", "extra_resource_managers"], [155, 15, 1, "", "fail_fast_on_attention_window_too_large"], [155, 11, 1, "", "field_name"], [155, 15, 1, "", "force_dynamic_quantization"], [155, 12, 1, "", "from_kwargs"], [155, 15, 1, "", "garbage_collection_gen0_threshold"], [155, 15, 1, "", "gather_generation_logits"], [155, 12, 1, "", "get_executor_config"], [155, 12, 1, "", "get_runtime_sizes"], [155, 15, 1, "", "gpus_per_node"], [155, 15, 1, "", "guided_decoding_backend"], [155, 16, 1, "", "init_backend"], [155, 15, 1, "", "iter_stats_max_iterations"], [155, 15, 1, "", "kv_cache_config"], [155, 15, 1, "", "kv_connector_config"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "lora_config"], [155, 15, 1, "", "max_batch_size"], [155, 15, 1, "", "max_beam_width"], [155, 15, 1, "", "max_input_len"], [155, 15, 1, "", "max_num_tokens"], [155, 15, 1, "", "max_seq_len"], [155, 15, 1, "", "mm_encoder_only"], [155, 15, 1, "", "model"], [155, 13, 1, "", "model_format"], [155, 15, 1, "", "moe_cluster_parallel_size"], [155, 15, 1, "", "moe_config"], [155, 15, 1, "", "moe_expert_parallel_size"], [155, 15, 1, "", "moe_tensor_parallel_size"], [155, 15, 1, "", "mpi_session"], [155, 11, 1, "", "msg"], [155, 15, 1, "", "num_postprocess_workers"], [155, 15, 1, "", "nvfp4_gemm_config"], [155, 15, 1, "", "orchestrator_type"], [155, 15, 1, "", "otlp_traces_endpoint"], [155, 13, 1, "", "parallel_config"], [155, 15, 1, "", "peft_cache_config"], [155, 15, 1, "", "perf_metrics_max_requests"], [155, 15, 1, "", "pipeline_parallel_size"], [155, 15, 1, "", "postprocess_tokenizer_dir"], [155, 15, 1, "", "pp_partition"], [155, 15, 1, "", "print_iter_log"], [155, 13, 1, "", "quant_config"], [155, 15, 1, "", "ray_worker_extension_cls"], [155, 15, 1, "", "reasoning_parser"], [155, 15, 1, "", "request_stats_max_iterations"], [155, 15, 1, "", "return_perf_metrics"], [155, 15, 1, "", "revision"], [155, 15, 1, "", "sampler_type"], [155, 15, 1, "", "scheduler_config"], [155, 16, 1, "", "set_default_max_input_len"], [155, 15, 1, "", "skip_tokenizer_init"], [155, 15, 1, "", "sparse_attention_config"], [155, 15, 1, "", "speculative_config"], [155, 13, 1, "", "speculative_model_dir"], [155, 13, 1, "", "speculative_model_format"], [155, 15, 1, "", "stream_interval"], [155, 16, 1, "", "sync_quant_config_with_kv_cache_config_dtype"], [155, 15, 1, "", "tensor_parallel_size"], [155, 15, 1, "", "tokenizer"], [155, 15, 1, "", "tokenizer_mode"], [155, 15, 1, "", "tokenizer_revision"], [155, 15, 1, "", "torch_compile_config"], [155, 15, 1, "", "trust_remote_code"], [155, 16, 1, "", "validate_and_init_tokenizer"], [155, 16, 1, "", "validate_attention_dp_config"], [155, 16, 1, "", "validate_batch_wait_max_tokens_ratio"], [155, 16, 1, "", "validate_batch_wait_timeout_iters"], [155, 16, 1, "", "validate_batch_wait_timeout_ms"], [155, 16, 1, "", "validate_checkpoint_format"], [155, 16, 1, "", "validate_cuda_graph_config"], [155, 16, 1, "", "validate_dtype"], [155, 16, 1, "", "validate_gpus_per_node"], [155, 16, 1, "", "validate_load_balancer"], [155, 16, 1, "", "validate_lora_config_consistency"], [155, 16, 1, "", "validate_misc"], [155, 16, 1, "", "validate_model"], [155, 16, 1, "", "validate_parallel_config"], [155, 16, 1, "", "validate_peft_cache_config"], [155, 16, 1, "", "validate_ray_worker_extension_cls"], [155, 16, 1, "", "validate_runtime_args"], [155, 16, 1, "", "validate_speculative_config"], [155, 16, 1, "", "validate_stream_interval"], [155, 16, 1, "", "validate_torch_compile_config"], [155, 12, 1, "", "warn_on_unstable_feature_usage"], [155, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TorchLlmArgs.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "backend"], [155, 15, 1, "", "batched_logits_processor"], [155, 15, 1, "", "batching_type"], [155, 15, 1, "", "build_config"], [155, 15, 1, "", "cache_transceiver_config"], [155, 15, 1, "", "calib_config"], [155, 15, 1, "", "context_parallel_size"], [155, 15, 1, "", "cp_config"], [155, 11, 1, "", "decoding_config"], [155, 15, 1, "", "dtype"], [155, 15, 1, "", "embedding_parallel_mode"], [155, 15, 1, "", "enable_attention_dp"], [155, 15, 1, "", "enable_build_cache"], [155, 15, 1, "", "enable_chunked_prefill"], [155, 15, 1, "", "enable_lm_head_tp_in_adp"], [155, 15, 1, "", "enable_lora"], [155, 15, 1, "", "enable_prompt_adapter"], [155, 15, 1, "", "enable_tqdm"], [155, 15, 1, "", "env_overrides"], [155, 15, 1, "", "extended_runtime_perf_knob_config"], [155, 15, 1, "", "fail_fast_on_attention_window_too_large"], [155, 15, 1, "", "fast_build"], [155, 11, 1, "", "field_name"], [155, 12, 1, "", "from_kwargs"], [155, 15, 1, "", "gather_generation_logits"], [155, 12, 1, "", "get_runtime_sizes"], [155, 15, 1, "", "gpus_per_node"], [155, 15, 1, "", "guided_decoding_backend"], [155, 16, 1, "", "init_build_config"], [155, 16, 1, "", "init_calib_config"], [155, 15, 1, "", "iter_stats_max_iterations"], [155, 15, 1, "", "kv_cache_config"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "lora_config"], [155, 15, 1, "", "max_batch_size"], [155, 15, 1, "", "max_beam_width"], [155, 15, 1, "", "max_input_len"], [155, 15, 1, "", "max_num_tokens"], [155, 15, 1, "", "max_prompt_adapter_token"], [155, 15, 1, "", "max_seq_len"], [155, 15, 1, "", "model"], [155, 13, 1, "", "model_format"], [155, 15, 1, "", "moe_cluster_parallel_size"], [155, 15, 1, "", "moe_expert_parallel_size"], [155, 15, 1, "", "moe_tensor_parallel_size"], [155, 15, 1, "", "mpi_session"], [155, 11, 1, "", "msg"], [155, 15, 1, "", "normalize_log_probs"], [155, 15, 1, "", "num_postprocess_workers"], [155, 15, 1, "", "orchestrator_type"], [155, 15, 1, "", "otlp_traces_endpoint"], [155, 13, 1, "", "parallel_config"], [155, 15, 1, "", "peft_cache_config"], [155, 15, 1, "", "pipeline_parallel_size"], [155, 15, 1, "", "postprocess_tokenizer_dir"], [155, 15, 1, "", "pp_partition"], [155, 15, 1, "", "quant_config"], [155, 15, 1, "", "reasoning_parser"], [155, 15, 1, "", "request_stats_max_iterations"], [155, 15, 1, "", "return_perf_metrics"], [155, 15, 1, "", "revision"], [155, 15, 1, "", "scheduler_config"], [155, 16, 1, "", "set_default_max_input_len"], [155, 16, 1, "", "setup_embedding_parallel_mode"], [155, 15, 1, "", "skip_tokenizer_init"], [155, 15, 1, "", "sparse_attention_config"], [155, 15, 1, "", "speculative_config"], [155, 13, 1, "", "speculative_model_dir"], [155, 13, 1, "", "speculative_model_format"], [155, 15, 1, "", "tensor_parallel_size"], [155, 15, 1, "", "tokenizer"], [155, 15, 1, "", "tokenizer_mode"], [155, 15, 1, "", "tokenizer_revision"], [155, 15, 1, "", "trust_remote_code"], [155, 16, 1, "", "validate_and_init_tokenizer"], [155, 16, 1, "", "validate_build_config_remaining"], [155, 16, 1, "", "validate_build_config_with_runtime_params"], [155, 16, 1, "", "validate_dtype"], [155, 16, 1, "", "validate_enable_build_cache"], [155, 16, 1, "", "validate_gpus_per_node"], [155, 16, 1, "", "validate_kv_cache_dtype"], [155, 16, 1, "", "validate_lora_config_consistency"], [155, 16, 1, "", "validate_model"], [155, 16, 1, "", "validate_model_format_misc"], [155, 16, 1, "", "validate_parallel_config"], [155, 16, 1, "", "validate_peft_cache_config"], [155, 16, 1, "", "validate_quant_config"], [155, 16, 1, "", "validate_runtime_args"], [155, 16, 1, "", "validate_speculative_config"], [155, 15, 1, "", "workspace"], [155, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig": [[155, 10, 1, "", "Config"], [155, 12, 1, "", "__init__"], [155, 15, 1, "", "acceptance_length_threshold"], [155, 15, 1, "", "acceptance_window"], [155, 12, 1, "", "construct"], [155, 12, 1, "", "copy"], [155, 11, 1, "", "decoding_type"], [155, 12, 1, "", "dict"], [155, 15, 1, "", "draft_len_schedule"], [155, 15, 1, "", "drafter"], [155, 12, 1, "", "from_dict"], [155, 12, 1, "", "from_orm"], [155, 13, 1, "", "is_linear_tree"], [155, 12, 1, "", "json"], [155, 15, 1, "", "load_format"], [155, 15, 1, "", "max_concurrency"], [155, 15, 1, "", "max_draft_len"], [155, 15, 1, "", "max_total_draft_tokens"], [155, 11, 1, "", "model_computed_fields"], [155, 11, 1, "", "model_config"], [155, 12, 1, "", "model_construct"], [155, 12, 1, "", "model_copy"], [155, 12, 1, "", "model_dump"], [155, 12, 1, "", "model_dump_json"], [155, 13, 1, "", "model_extra"], [155, 11, 1, "", "model_fields"], [155, 13, 1, "", "model_fields_set"], [155, 12, 1, "", "model_json_schema"], [155, 12, 1, "", "model_parametrized_name"], [155, 12, 1, "", "model_post_init"], [155, 12, 1, "", "model_rebuild"], [155, 12, 1, "", "model_validate"], [155, 12, 1, "", "model_validate_json"], [155, 12, 1, "", "model_validate_strings"], [155, 12, 1, "", "parse_file"], [155, 12, 1, "", "parse_obj"], [155, 12, 1, "", "parse_raw"], [155, 15, 1, "", "resource_manager"], [155, 12, 1, "", "schema"], [155, 12, 1, "", "schema_json"], [155, 13, 1, "", "spec_dec_mode"], [155, 15, 1, "", "speculative_model_dir"], [155, 12, 1, "", "supports_backend"], [155, 12, 1, "", "update_forward_refs"], [155, 12, 1, "", "validate"], [155, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config": [[155, 11, 1, "", "extra"]], "tensorrt_llm.models": [[143, 10, 1, "", "BaichuanForCausalLM"], [143, 10, 1, "", "BertForQuestionAnswering"], [143, 10, 1, "", "BertForSequenceClassification"], [143, 10, 1, "", "BertModel"], [143, 10, 1, "", "BloomForCausalLM"], [143, 10, 1, "", "BloomModel"], [143, 10, 1, "", "CLIPVisionTransformer"], [143, 10, 1, "", "ChatGLMConfig"], [143, 10, 1, "", "ChatGLMForCausalLM"], [143, 10, 1, "", "ChatGLMModel"], [143, 10, 1, "", "CogVLMConfig"], [143, 10, 1, "", "CogVLMForCausalLM"], [143, 10, 1, "", "CohereForCausalLM"], [143, 10, 1, "", "DbrxConfig"], [143, 10, 1, "", "DbrxForCausalLM"], [143, 10, 1, "", "DecoderModel"], [143, 10, 1, "", "DeepseekForCausalLM"], [143, 10, 1, "", "DeepseekV2ForCausalLM"], [143, 10, 1, "", "DiT"], [143, 10, 1, "", "EagleForCausalLM"], [143, 10, 1, "", "EncoderModel"], [143, 10, 1, "", "FalconConfig"], [143, 10, 1, "", "FalconForCausalLM"], [143, 10, 1, "", "FalconModel"], [143, 10, 1, "", "GPTConfig"], [143, 10, 1, "", "GPTForCausalLM"], [143, 10, 1, "", "GPTJConfig"], [143, 10, 1, "", "GPTJForCausalLM"], [143, 10, 1, "", "GPTJModel"], [143, 10, 1, "", "GPTModel"], [143, 10, 1, "", "GPTNeoXForCausalLM"], [143, 10, 1, "", "GPTNeoXModel"], [143, 10, 1, "", "GemmaConfig"], [143, 10, 1, "", "GemmaForCausalLM"], [143, 10, 1, "", "LLaMAConfig"], [143, 10, 1, "", "LLaMAForCausalLM"], [143, 10, 1, "", "LLaMAModel"], [143, 10, 1, "", "LlavaNextVisionConfig"], [143, 10, 1, "", "LlavaNextVisionWrapper"], [143, 10, 1, "", "MLLaMAForCausalLM"], [143, 10, 1, "", "MPTForCausalLM"], [143, 10, 1, "", "MPTModel"], [143, 10, 1, "", "MambaForCausalLM"], [143, 10, 1, "", "MedusaConfig"], [143, 10, 1, "", "MedusaForCausalLm"], [143, 10, 1, "", "OPTForCausalLM"], [143, 10, 1, "", "OPTModel"], [143, 10, 1, "", "Phi3ForCausalLM"], [143, 10, 1, "", "Phi3Model"], [143, 10, 1, "", "PhiForCausalLM"], [143, 10, 1, "", "PhiModel"], [143, 10, 1, "", "PretrainedConfig"], [143, 10, 1, "", "PretrainedModel"], [143, 10, 1, "", "ReDrafterForLLaMALM"], [143, 10, 1, "", "ReDrafterForQWenLM"], [143, 10, 1, "", "RecurrentGemmaForCausalLM"], [143, 11, 1, "", "RobertaForQuestionAnswering"], [143, 11, 1, "", "RobertaForSequenceClassification"], [143, 11, 1, "", "RobertaModel"], [143, 10, 1, "", "SD3Transformer2DModel"], [143, 10, 1, "", "SpeculativeDecodingMode"], [143, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "default_plugin_config"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[143, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[143, 12, 1, "", "check_config"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "precompute_relative_attention_bias"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[143, 12, 1, "", "check_config"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "forward_with_cfg"], [143, 12, 1, "", "forward_without_cfg"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[143, 12, 1, "", "check_config"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "precompute_relative_attention_bias"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "use_lora"], [143, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[143, 12, 1, "", "check_config"], [143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "from_nemo"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "from_nemo"], [143, 12, 1, "", "quantize"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[143, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [143, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [143, 11, 1, "", "GEMMA_ADDED_FIELDS"], [143, 11, 1, "", "VERBATIM"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "gemma2_config"], [143, 12, 1, "", "gemma3_config"], [143, 12, 1, "", "get_hf_config"], [143, 13, 1, "", "is_gemma_2"], [143, 13, 1, "", "is_gemma_3"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[143, 11, 1, "", "NATIVE_QUANT_FLOW"], [143, 12, 1, "", "assert_valid_quant_algo"], [143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "quantize"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "from_meta_ckpt"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "default_plugin_config"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "from_meta_ckpt"], [143, 12, 1, "", "quantize"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[143, 12, 1, "", "forward"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[143, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[143, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[143, 12, 1, "", "check_config"], [143, 11, 1, "", "config_class"], [143, 12, 1, "", "from_hugging_face"], [143, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[143, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[143, 12, 1, "", "create_runtime_defaults"], [143, 12, 1, "", "for_each_rank"], [143, 12, 1, "", "from_checkpoint"], [143, 12, 1, "", "from_dict"], [143, 12, 1, "", "from_json_file"], [143, 12, 1, "", "get_config_group"], [143, 12, 1, "", "has_config_group"], [143, 13, 1, "", "kv_dtype"], [143, 13, 1, "", "quant_algo"], [143, 13, 1, "", "quant_mode"], [143, 12, 1, "", "set_if_not_exist"], [143, 12, 1, "", "set_rank"], [143, 12, 1, "", "to_dict"], [143, 12, 1, "", "to_json_file"], [143, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[143, 12, 1, "", "check_config"], [143, 12, 1, "", "from_checkpoint"], [143, 12, 1, "", "from_config"], [143, 12, 1, "", "load"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "quantize"], [143, 12, 1, "", "release"], [143, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[143, 12, 1, "", "forward"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[143, 13, 1, "", "attn_processors"], [143, 11, 1, "", "config_class"], [143, 12, 1, "", "disable_forward_chunking"], [143, 12, 1, "", "enable_forward_chunking"], [143, 12, 1, "", "forward"], [143, 12, 1, "", "from_pretrained"], [143, 12, 1, "", "fuse_qkv_projections"], [143, 12, 1, "", "load"], [143, 12, 1, "", "prepare_inputs"], [143, 12, 1, "", "set_attn_processor"], [143, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[143, 11, 1, "", "AUTO"], [143, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [143, 11, 1, "", "EAGLE"], [143, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [143, 11, 1, "", "LOOKAHEAD_DECODING"], [143, 11, 1, "", "MEDUSA"], [143, 11, 1, "", "NGRAM"], [143, 11, 1, "", "NONE"], [143, 11, 1, "", "SAVE_HIDDEN_STATES"], [143, 11, 1, "", "USER_PROVIDED"], [143, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[143, 12, 1, "", "forward"], [143, 12, 1, "", "precompute_relative_attention_bias"], [143, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[144, 17, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[144, 15, 1, "", "bert_attention_plugin"], [144, 15, 1, "", "bert_context_fmha_fp32_acc"], [144, 15, 1, "", "context_fmha"], [144, 13, 1, "", "context_fmha_type"], [144, 16, 1, "", "convert_enable_disable"], [144, 15, 1, "", "dora_plugin"], [144, 15, 1, "", "dtype"], [144, 12, 1, "", "enable_paged_kv_cache"], [144, 15, 1, "", "fp8_rowwise_gemm_plugin"], [144, 12, 1, "", "from_arguments"], [144, 15, 1, "", "fuse_fp4_quant"], [144, 15, 1, "", "gemm_allreduce_plugin"], [144, 15, 1, "", "gemm_plugin"], [144, 15, 1, "", "gemm_swiglu_plugin"], [144, 15, 1, "", "gpt_attention_plugin"], [144, 15, 1, "", "identity_plugin"], [144, 12, 1, "", "is_context_fmha_enabled"], [144, 15, 1, "", "layernorm_quantization_plugin"], [144, 16, 1, "", "log_field_changes"], [144, 15, 1, "", "lora_plugin"], [144, 15, 1, "", "low_latency_gemm_plugin"], [144, 15, 1, "", "low_latency_gemm_swiglu_plugin"], [144, 15, 1, "", "mamba_conv1d_plugin"], [144, 15, 1, "", "manage_weights"], [144, 12, 1, "", "model_post_init"], [144, 15, 1, "", "moe_plugin"], [144, 15, 1, "", "multiple_profiles"], [144, 15, 1, "", "nccl_plugin"], [144, 15, 1, "", "norm_quant_fusion"], [144, 15, 1, "", "paged_kv_cache"], [144, 15, 1, "", "paged_state"], [144, 15, 1, "", "pp_reduce_scatter"], [144, 15, 1, "", "qserve_gemm_plugin"], [144, 15, 1, "", "quantize_per_token_plugin"], [144, 15, 1, "", "quantize_tensor_plugin"], [144, 15, 1, "", "reduce_fusion"], [144, 15, 1, "", "remove_input_padding"], [144, 15, 1, "", "rmsnorm_quantization_plugin"], [144, 12, 1, "", "set_context_fmha"], [144, 12, 1, "", "set_dora_plugin"], [144, 12, 1, "", "set_fp8_rowwise_quant_plugins"], [144, 12, 1, "", "set_lora_plugin"], [144, 12, 1, "", "set_nccl_plugin"], [144, 12, 1, "", "set_qserve_plugins"], [144, 12, 1, "", "set_smooth_quant_plugins"], [144, 15, 1, "", "smooth_quant_gemm_plugin"], [144, 15, 1, "", "smooth_quant_plugins"], [144, 15, 1, "", "streamingllm"], [144, 12, 1, "", "to_legacy_setting"], [144, 15, 1, "", "tokens_per_block"], [144, 15, 1, "", "use_fp8_context_fmha"], [144, 15, 1, "", "use_fused_mlp"], [144, 15, 1, "", "use_paged_context_fmha"], [144, 15, 1, "", "user_buffer"], [144, 12, 1, "", "validate"], [144, 16, 1, "", "validate_dtype_not_auto"], [144, 15, 1, "", "weight_only_groupwise_quant_matmul_plugin"], [144, 15, 1, "", "weight_only_quant_matmul_plugin"]], "tensorrt_llm.quantization": [[145, 10, 1, "", "QuantAlgo"], [145, 10, 1, "", "QuantMode"], [145, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[146, 10, 1, "", "ChatGLMGenerationSession"], [146, 10, 1, "", "EncDecModelRunner"], [146, 10, 1, "", "GenerationSequence"], [146, 10, 1, "", "GenerationSession"], [146, 10, 1, "", "KVCacheManager"], [146, 10, 1, "", "LogitsProcessor"], [146, 10, 1, "", "LogitsProcessorList"], [146, 10, 1, "", "ModelConfig"], [146, 10, 1, "", "ModelRunner"], [146, 10, 1, "", "ModelRunnerCpp"], [146, 10, 1, "", "MultimodalModelRunner"], [146, 10, 1, "", "QWenForCausalLMGenerationSession"], [146, 10, 1, "", "SamplingConfig"], [146, 10, 1, "", "Session"], [146, 10, 1, "", "StoppingCriteria"], [146, 10, 1, "", "StoppingCriteriaList"], [146, 10, 1, "", "TensorInfo"], [146, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[146, 12, 1, "", "encoder_run"], [146, 12, 1, "", "from_engine"], [146, 12, 1, "", "generate"], [146, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[146, 12, 1, "", "get_batch_idx"], [146, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[146, 11, 1, "", "batch_size"], [146, 11, 1, "", "buffer_allocated"], [146, 13, 1, "", "context_mem_size"], [146, 13, 1, "", "conv_kernel"], [146, 13, 1, "", "cross_attention"], [146, 11, 1, "", "cuda_graph_mode"], [146, 12, 1, "", "cuda_stream_guard"], [146, 11, 1, "", "debug_mode"], [146, 11, 1, "", "debug_tensors_to_save"], [146, 12, 1, "", "decode"], [146, 12, 1, "", "decode_batch"], [146, 12, 1, "", "decode_regular"], [146, 12, 1, "", "decode_stream"], [146, 11, 1, "", "device"], [146, 13, 1, "", "dtype"], [146, 12, 1, "", "dump_debug_buffers"], [146, 12, 1, "", "early_stop_criteria"], [146, 13, 1, "", "engine_inspector"], [146, 12, 1, "", "filter_medusa_logits"], [146, 12, 1, "", "finalize_decoder"], [146, 12, 1, "", "find_best_medusa_path"], [146, 13, 1, "", "first_layer"], [146, 13, 1, "", "gather_context_logits"], [146, 13, 1, "", "gather_generation_logits"], [146, 13, 1, "", "gemm_allreduce_plugin"], [146, 12, 1, "", "get_next_medusa_tokens"], [146, 12, 1, "", "get_num_heads_kv"], [146, 12, 1, "", "handle_per_step"], [146, 13, 1, "", "has_position_embedding"], [146, 13, 1, "", "has_token_type_embedding"], [146, 13, 1, "", "head_size"], [146, 13, 1, "", "hidden_size"], [146, 13, 1, "", "is_medusa_mode"], [146, 13, 1, "", "is_redrafter_mode"], [146, 13, 1, "", "kv_cache_type"], [146, 13, 1, "", "last_layer"], [146, 12, 1, "", "locate_accepted_draft_tokens"], [146, 11, 1, "", "mapping"], [146, 13, 1, "", "max_draft_tokens"], [146, 13, 1, "", "max_prompt_embedding_table_size"], [146, 12, 1, "", "medusa_decode_and_verify"], [146, 11, 1, "", "medusa_paths"], [146, 11, 1, "", "medusa_position_offsets"], [146, 11, 1, "", "medusa_temperature"], [146, 11, 1, "", "medusa_topks"], [146, 11, 1, "", "medusa_tree_ids"], [146, 12, 1, "", "next_medusa_input_ids"], [146, 11, 1, "", "num_draft_tokens"], [146, 13, 1, "", "num_heads"], [146, 13, 1, "", "num_layers"], [146, 13, 1, "", "num_medusa_heads"], [146, 13, 1, "", "paged_kv_cache"], [146, 13, 1, "", "paged_state"], [146, 12, 1, "", "pp_communicate_final_output_ids"], [146, 12, 1, "", "pp_communicate_new_tokens"], [146, 12, 1, "", "process_logits_including_draft"], [146, 13, 1, "", "profiler"], [146, 13, 1, "", "quant_mode"], [146, 13, 1, "", "remove_input_padding"], [146, 12, 1, "", "reorder_kv_cache_for_beam_search"], [146, 13, 1, "", "rnn_conv_dim_size"], [146, 13, 1, "", "rnn_head_size"], [146, 13, 1, "", "rnn_hidden_size"], [146, 11, 1, "", "runtime"], [146, 12, 1, "", "setup"], [146, 13, 1, "", "state_dtype"], [146, 13, 1, "", "state_size"], [146, 13, 1, "", "tokens_per_block"], [146, 12, 1, "", "update_output_ids_by_offset"], [146, 13, 1, "", "use_gemm_allreduce_plugin"], [146, 13, 1, "", "use_gpt_attention_plugin"], [146, 13, 1, "", "use_kv_cache"], [146, 13, 1, "", "use_lora_plugin"], [146, 13, 1, "", "use_mamba_conv1d_plugin"], [146, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[146, 12, 1, "", "add_sequence"], [146, 12, 1, "", "get_block_offsets"], [146, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[146, 11, 1, "", "conv_kernel"], [146, 11, 1, "", "cross_attention"], [146, 11, 1, "", "dtype"], [146, 12, 1, "", "from_model_config_cpp"], [146, 11, 1, "", "gather_context_logits"], [146, 11, 1, "", "gather_generation_logits"], [146, 11, 1, "", "gemm_allreduce_plugin"], [146, 11, 1, "", "gpt_attention_plugin"], [146, 11, 1, "", "gpu_weights_percent"], [146, 11, 1, "", "has_position_embedding"], [146, 11, 1, "", "has_token_type_embedding"], [146, 11, 1, "", "head_size"], [146, 11, 1, "", "hidden_size"], [146, 11, 1, "", "kv_cache_type"], [146, 11, 1, "", "language_adapter_config"], [146, 11, 1, "", "layer_types"], [146, 11, 1, "", "lora_plugin"], [146, 11, 1, "", "lora_target_modules"], [146, 11, 1, "", "mamba_conv1d_plugin"], [146, 11, 1, "", "max_batch_size"], [146, 11, 1, "", "max_beam_width"], [146, 11, 1, "", "max_medusa_tokens"], [146, 11, 1, "", "max_prompt_embedding_table_size"], [146, 11, 1, "", "model_name"], [146, 11, 1, "", "num_heads"], [146, 11, 1, "", "num_kv_heads"], [146, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [146, 11, 1, "", "num_kv_heads_per_layer"], [146, 11, 1, "", "num_layers"], [146, 11, 1, "", "num_medusa_heads"], [146, 11, 1, "", "paged_state"], [146, 11, 1, "", "quant_mode"], [146, 11, 1, "", "redrafter_draft_len_per_beam"], [146, 11, 1, "", "redrafter_num_beams"], [146, 11, 1, "", "remove_input_padding"], [146, 11, 1, "", "rnn_conv_dim_size"], [146, 11, 1, "", "rnn_head_size"], [146, 11, 1, "", "rnn_hidden_size"], [146, 11, 1, "", "skip_cross_attn_blocks"], [146, 11, 1, "", "skip_cross_kv"], [146, 11, 1, "", "state_dtype"], [146, 11, 1, "", "state_size"], [146, 11, 1, "", "tokens_per_block"], [146, 11, 1, "", "trtllm_modules_to_hf_modules"], [146, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[146, 13, 1, "", "dtype"], [146, 12, 1, "", "from_dir"], [146, 12, 1, "", "from_engine"], [146, 13, 1, "", "gather_context_logits"], [146, 13, 1, "", "gather_generation_logits"], [146, 12, 1, "", "generate"], [146, 13, 1, "", "hidden_size"], [146, 13, 1, "", "mapping"], [146, 13, 1, "", "max_prompt_embedding_table_size"], [146, 13, 1, "", "max_sequence_length"], [146, 13, 1, "", "num_heads"], [146, 13, 1, "", "num_layers"], [146, 13, 1, "", "remove_input_padding"], [146, 12, 1, "", "serialize_engine"], [146, 13, 1, "", "use_lora_plugin"], [146, 13, 1, "", "vocab_size"], [146, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[146, 13, 1, "", "dtype"], [146, 12, 1, "", "from_dir"], [146, 13, 1, "", "gather_context_logits"], [146, 13, 1, "", "gather_generation_logits"], [146, 12, 1, "", "generate"], [146, 13, 1, "", "hidden_size"], [146, 13, 1, "", "max_prompt_embedding_table_size"], [146, 13, 1, "", "max_sequence_length"], [146, 13, 1, "", "num_heads"], [146, 13, 1, "", "num_layers"], [146, 13, 1, "", "remove_input_padding"], [146, 13, 1, "", "vocab_size"], [146, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[146, 13, 1, "", "audio_engine_dir"], [146, 13, 1, "", "cpp_e2e"], [146, 13, 1, "", "cpp_llm_only"], [146, 12, 1, "", "generate"], [146, 12, 1, "", "get_audio_features"], [146, 12, 1, "", "get_rope_index"], [146, 12, 1, "", "get_visual_features"], [146, 12, 1, "", "init_audio_encoder"], [146, 12, 1, "", "init_image_encoder"], [146, 12, 1, "", "init_llm"], [146, 12, 1, "", "init_processor"], [146, 12, 1, "", "init_tokenizer"], [146, 13, 1, "", "llm_engine_dir"], [146, 12, 1, "", "load_test_audio"], [146, 12, 1, "", "load_test_data"], [146, 12, 1, "", "prepare_position_ids_for_cogvlm"], [146, 12, 1, "", "preprocess"], [146, 12, 1, "", "ptuning_setup"], [146, 12, 1, "", "ptuning_setup_fuyu"], [146, 12, 1, "", "ptuning_setup_llava_next"], [146, 12, 1, "", "ptuning_setup_phi3"], [146, 12, 1, "", "ptuning_setup_pixtral"], [146, 13, 1, "", "python_e2e"], [146, 12, 1, "", "run"], [146, 12, 1, "", "setup_fake_prompts"], [146, 12, 1, "", "setup_fake_prompts_qwen2vl"], [146, 12, 1, "", "setup_fake_prompts_vila"], [146, 12, 1, "", "setup_inputs"], [146, 12, 1, "", "split_prompt_by_images"], [146, 12, 1, "", "tokenizer_image_token"], [146, 12, 1, "", "video_preprocess"], [146, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[146, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[146, 11, 1, "", "bad_words_list"], [146, 11, 1, "", "beam_search_diversity_rate"], [146, 11, 1, "", "early_stopping"], [146, 11, 1, "", "end_id"], [146, 11, 1, "", "frequency_penalty"], [146, 11, 1, "", "length_penalty"], [146, 11, 1, "", "max_attention_window_size"], [146, 11, 1, "", "max_new_tokens"], [146, 11, 1, "", "min_length"], [146, 11, 1, "", "min_p"], [146, 11, 1, "", "no_repeat_ngram_size"], [146, 11, 1, "", "num_beams"], [146, 11, 1, "", "num_return_sequences"], [146, 11, 1, "", "output_cum_log_probs"], [146, 11, 1, "", "output_log_probs"], [146, 11, 1, "", "output_sequence_lengths"], [146, 11, 1, "", "pad_id"], [146, 11, 1, "", "presence_penalty"], [146, 11, 1, "", "prompt_ignore_length"], [146, 11, 1, "", "random_seed"], [146, 11, 1, "", "repetition_penalty"], [146, 11, 1, "", "return_dict"], [146, 11, 1, "", "sink_token_length"], [146, 11, 1, "", "stop_words_list"], [146, 11, 1, "", "temperature"], [146, 11, 1, "", "top_k"], [146, 11, 1, "", "top_p"], [146, 11, 1, "", "top_p_decay"], [146, 11, 1, "", "top_p_min"], [146, 11, 1, "", "top_p_reset_ids"], [146, 12, 1, "", "update"], [146, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[146, 13, 1, "", "context"], [146, 13, 1, "", "context_mem_size"], [146, 13, 1, "", "engine"], [146, 12, 1, "", "from_engine"], [146, 12, 1, "", "from_serialized_engine"], [146, 12, 1, "", "infer_shapes"], [146, 12, 1, "", "run"], [146, 13, 1, "", "runtime"], [146, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[146, 11, 1, "", "dtype"], [146, 11, 1, "", "name"], [146, 12, 1, "", "numel"], [146, 11, 1, "", "shape"], [146, 12, 1, "", "squeeze"], [146, 12, 1, "", "view"]], "trtllm-bench": [[22, 18, 1, "cmdoption-trtllm-bench-log_level", "--log_level"], [22, 18, 1, "cmdoption-trtllm-bench-m", "--model"], [22, 18, 1, "cmdoption-trtllm-bench-model_path", "--model_path"], [22, 18, 1, "cmdoption-trtllm-bench-revision", "--revision"], [22, 18, 1, "cmdoption-trtllm-bench-w", "--workspace"], [22, 18, 1, "cmdoption-trtllm-bench-m", "-m"], [22, 18, 1, "cmdoption-trtllm-bench-w", "-w"]], "trtllm-bench-build": [[22, 18, 1, "cmdoption-trtllm-bench-build-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-no_weights_loading", "--no_weights_loading"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "--pp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "--quantization"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "--tp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-trust_remote_code", "--trust_remote_code"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "-pp"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "-q"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "-tp"]], "trtllm-bench-latency": [[22, 18, 1, "cmdoption-trtllm-bench-latency-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-latency-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-latency-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-latency-config", "--config"], [22, 18, 1, "cmdoption-trtllm-bench-latency-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-latency-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-latency-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-latency-config", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-medusa_choices", "--medusa_choices"], [22, 18, 1, "cmdoption-trtllm-bench-latency-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-latency-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-latency-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-latency-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-warmup", "--warmup"]], "trtllm-bench-throughput": [[22, 18, 1, "cmdoption-trtllm-bench-throughput-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-cluster_size", "--cluster_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-config", "--config"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-custom_module_dirs", "--custom_module_dirs"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-data_device", "--data_device"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--disable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--enable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-eos_id", "--eos_id"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-config", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-image_data_format", "--image_data_format"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", "--no_skip_tokenizer_init"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-output_json", "--output_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-request_json", "--request_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-scheduler_policy", "--scheduler_policy"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-streaming", "--streaming"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-warmup", "--warmup"]], "trtllm-eval": [[24, 18, 1, "cmdoption-trtllm-eval-backend", "--backend"], [24, 18, 1, "cmdoption-trtllm-eval-config", "--config"], [24, 18, 1, "cmdoption-trtllm-eval-disable_kv_cache_reuse", "--disable_kv_cache_reuse"], [24, 18, 1, "cmdoption-trtllm-eval-ep_size", "--ep_size"], [24, 18, 1, "cmdoption-trtllm-eval-config", "--extra_llm_api_options"], [24, 18, 1, "cmdoption-trtllm-eval-gpus_per_node", "--gpus_per_node"], [24, 18, 1, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [24, 18, 1, "cmdoption-trtllm-eval-log_level", "--log_level"], [24, 18, 1, "cmdoption-trtllm-eval-max_batch_size", "--max_batch_size"], [24, 18, 1, "cmdoption-trtllm-eval-max_beam_width", "--max_beam_width"], [24, 18, 1, "cmdoption-trtllm-eval-max_num_tokens", "--max_num_tokens"], [24, 18, 1, "cmdoption-trtllm-eval-max_seq_len", "--max_seq_len"], [24, 18, 1, "cmdoption-trtllm-eval-model", "--model"], [24, 18, 1, "cmdoption-trtllm-eval-pp_size", "--pp_size"], [24, 18, 1, "cmdoption-trtllm-eval-revision", "--revision"], [24, 18, 1, "cmdoption-trtllm-eval-tokenizer", "--tokenizer"], [24, 18, 1, "cmdoption-trtllm-eval-tp_size", "--tp_size"], [24, 18, 1, "cmdoption-trtllm-eval-trust_remote_code", "--trust_remote_code"]], "trtllm-eval-cnn_dailymail": [[24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", "--rouge_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_diamond": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_extended": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_main": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-system_prompt", "--system_prompt"]], "trtllm-eval-gsm8k": [[24, 18, 1, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", "--fewshot_as_multiturn"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-system_prompt", "--system_prompt"]], "trtllm-eval-json_mode_eval": [[24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", "--system_prompt"]], "trtllm-eval-longbench_v2": [[24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-cot", "--cot"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-difficulty", "--difficulty"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-domain", "--domain"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-length", "--length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_len", "--max_len"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-no_context", "--no_context"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-output_dir", "--output_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", "--prompts_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-rag", "--rag"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-start_idx", "--start_idx"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-system_prompt", "--system_prompt"]], "trtllm-eval-mmlu": [[24, 18, 1, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", "--accuracy_threshold"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-check_accuracy", "--check_accuracy"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_fewshot", "--num_fewshot"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-system_prompt", "--system_prompt"]], "trtllm-eval-mmmu": [[24, 18, 1, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-system_prompt", "--system_prompt"]], "trtllm-serve-disaggregated": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", "--metrics-log-interval"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "-l"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "-m"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-mm_embedding_serve": [[27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", "--extra_encoder_options"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", "MODEL"]], "trtllm-serve-serve": [[27, 18, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [27, 18, 1, "cmdoption-trtllm-serve-serve-chat_template", "--chat_template"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", "--cluster_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-config", "--config"], [27, 18, 1, "cmdoption-trtllm-serve-serve-context_parallel_size", "--context_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-context_parallel_size", "--cp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-custom_module_dirs", "--custom_module_dirs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", "--disagg_cluster_uri"], [27, 18, 1, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", "--enable_chunked_prefill"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", "--ep_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-config", "--extra_llm_api_options"], [27, 18, 1, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", "--fail_fast_on_attention_window_too_large"], [27, 18, 1, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", "--free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [27, 18, 1, "cmdoption-trtllm-serve-serve-media_io_kwargs", "--media_io_kwargs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", "--moe_cluster_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", "--moe_expert_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [27, 18, 1, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", "--otlp_traces_endpoint"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", "--pipeline_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", "--pp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-revision", "--revision"], [27, 18, 1, "cmdoption-trtllm-serve-serve-server_role", "--server_role"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tensor_parallel_size", "--tensor_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tool_parser", "--tool_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tensor_parallel_size", "--tp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["py", "pydantic_model", "Python model"], "18": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "py:pydantic_model", "18": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 42, 52, 56, 57, 60, 62, 63, 67, 78, 80, 82, 84, 85, 87, 88, 92, 94, 96, 97, 98, 99, 100, 101, 103, 104, 106, 109, 110, 111, 112, 114, 115, 116, 121, 123, 124, 125, 126, 127, 128, 132, 133, 135, 136, 137, 138, 139, 141, 142, 143, 144, 146, 147, 149, 151, 152, 154, 155, 156, 158, 159, 160, 161, 162, 163, 164, 166, 168, 169, 170, 172, 177], "0": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 74, 75, 76, 79, 80, 81, 82, 84, 85, 86, 88, 90, 92, 93, 95, 99, 102, 103, 104, 106, 109, 110, 111, 113, 114, 115, 117, 118, 121, 122, 124, 125, 126, 127, 131, 132, 133, 138, 139, 140, 141, 142, 143, 146, 147, 150, 151, 152, 153, 155, 156, 157, 158, 159, 161, 164, 165, 166, 168, 169, 170, 171, 173, 176, 178], "00": [8, 13, 30, 42, 64, 65, 66, 125, 132, 133, 151], "000": [2, 8, 11, 42, 105, 132, 158], "0000": [42, 132, 133, 155], "0012": 132, "00128": 27, "0017": 43, "003": 43, "0047": 151, "0058": 28, "0060": [28, 29], "0062": 28, "0063": 28, "0068": 31, "00688362121582": 27, "007": 43, "0070": 151, "0071": 151, "0075": 32, "007f": 155, "0080": 32, "0083": 32, "0086": 32, "0087": 31, "0096": 151, "00978": 149, "01": [7, 13, 64, 65, 66, 71, 132, 147, 160], "0101": 31, "0105": 2, "014": 5, "0158": 133, "0162": 135, "0165": 138, "02": [30, 160], "020": 43, "0235": 151, "024": 24, "026": 43, "0260": 151, "027": 43, "0273": 151, "028": 43, "0294": 151, "03": [26, 138, 151, 160], "032": 13, "0339": 43, "035": 43, "03762": 141, "03961": 112, "03x": 14, "04": [97, 109, 153, 160, 174], "0449": 151, "04532": 144, "045471": 16, "046": 43, "0461": 2, "0463": 43, "048": 43, "049": 43, "05": [141, 142, 143, 151, 160], "051": 43, "05100": 141, "0523": 151, "0528": [12, 28, 35, 43], "0554": 133, "0560": 151, "0563": 43, "057": 43, "06": [13, 19, 132, 141, 142, 160], "061": 43, "0630": 151, "0669": 2, "0675": 2, "0682": 151, "0689e": 132, "07": [7, 13, 160], "0704": 133, "0713": 151, "0723": 151, "0732": 151, "074": 43, "0772": 2, "0776": 151, "078": 43, "079": 19, "08": [13, 26, 138], "0804": 151, "081947": 16, "082": 43, "0838": [2, 43], "088": 43, "0881": 139, "09": [13, 151], "0903": 151, "0910": 151, "092": 19, "092314": 16, "092623": 16, "093256": 16, "09353": 118, "094": 24, "096": 24, "0964": 43, "09685": 118, "09f": [0, 1], "0cf2f6f154b4a5765d89945b20aa3449b2be7933": 20, "0e": 114, "0f": [0, 114], "0rc0": 105, "0rc1": [21, 42, 132], "0rc5": [26, 28, 29, 31, 32, 107, 159], "0u": 1, "0x": 4, "0x0000000000000000": 160, "1": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 17, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 42, 44, 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 75, 76, 79, 80, 81, 82, 85, 86, 88, 90, 93, 94, 95, 97, 98, 100, 102, 103, 104, 105, 107, 109, 110, 111, 113, 114, 115, 117, 118, 121, 122, 124, 126, 127, 131, 132, 133, 134, 135, 138, 140, 141, 142, 143, 145, 146, 148, 150, 151, 152, 155, 157, 158, 159, 164, 165, 166, 168, 170, 171, 173, 174, 175, 176, 177], "10": [0, 2, 7, 8, 9, 10, 13, 14, 16, 17, 19, 20, 21, 28, 42, 43, 47, 48, 50, 63, 70, 71, 82, 93, 95, 104, 105, 117, 118, 121, 132, 133, 135, 139, 141, 147, 150, 151, 155, 166, 173], "100": [0, 2, 8, 10, 16, 20, 24, 26, 41, 42, 50, 54, 65, 68, 93, 117, 120, 131, 132, 133], "1000": [0, 18, 41, 42, 43, 131, 132, 133, 155], "10000": [141, 142, 143], "1003": 160, "100gb": 15, "100m": 105, "101": [20, 117], "101029": 16, "101253": 27, "101256": 27, "101978": 43, "102": [4, 20, 117], "1024": [1, 2, 5, 7, 11, 16, 18, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 43, 47, 48, 62, 68, 82, 95, 104, 114, 124, 132, 133, 138, 141, 142, 151, 152, 155, 166, 173], "10240": 70, "102415": [42, 132], "103": [16, 20, 117], "1039": 33, "104": [20, 160], "1041": 21, "10438": 149, "1045": 151, "1047": [42, 132], "105": [20, 43], "1050": 151, "1051": 133, "1059": [42, 132], "106": [20, 43], "106563": 43, "107": [20, 43], "1072": 151, "107501": 43, "10774": 0, "1079": 127, "107u": 12, "108": [20, 43], "1082": 151, "10858": 47, "109": [16, 20, 43], "10b": [141, 160], "10m": 4, "11": [0, 2, 5, 7, 8, 16, 17, 19, 20, 43, 98, 118, 121, 132, 135, 141, 151], "110": 20, "11023": [42, 132], "110804": 43, "110b": 160, "111": [4, 13, 20], "111302": 43, "111618": 43, "111668": 43, "1118": 160, "112": [20, 43], "1123": 160, "113": 20, "1134": 147, "113420": 16, "1135": 151, "114": [16, 20, 43], "1141": 151, "114688": 2, "1148": 160, "11489": 2, "11490": 132, "115": [16, 20], "1151": 2, "115378": 16, "115716": 43, "116": [20, 43], "1160": [27, 34], "117": [20, 43], "1178": [42, 132], "118": 20, "1181": 160, "1183": 160, "119": [16, 20, 42, 132], "11943": [42, 132], "11947": 47, "1196": 2, "119648": 16, "11b": [150, 157, 160], "11x": 17, "12": [0, 4, 8, 13, 16, 17, 18, 19, 20, 26, 30, 43, 47, 82, 97, 98, 105, 109, 118, 124, 132, 135, 138, 141, 151, 166, 174], "120": [12, 16, 20], "120b": [29, 35, 105, 157], "121": 20, "1212": 151, "121847": 132, "1219": 2, "122": [20, 132], "1225": 141, "12288": [42, 132], "123": [20, 50, 51], "1234": [143, 155], "1239": 160, "124": 20, "1242": 160, "1245": 26, "1248": 160, "125": [16, 20, 132], "1252": [42, 127, 132], "1256": 160, "1257": 2, "125m": [121, 124], "126": [20, 42, 132], "1267": 160, "127": [20, 141], "1272": 151, "128": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 20, 26, 28, 29, 30, 31, 32, 33, 34, 42, 43, 47, 50, 51, 65, 70, 79, 80, 104, 113, 117, 118, 122, 125, 132, 143, 155, 160, 164, 166, 168], "1284": 160, "1287": 135, "128798": 155, "128799": 155, "128e": [18, 43, 86, 171], "128k": [10, 86, 171], "129": [16, 20, 43], "1290": 151, "1291504": 133, "1293": 127, "12945": 2, "129498": 2, "13": [6, 15, 16, 17, 19, 20, 43, 109, 113, 118, 132, 133, 141, 150, 151], "130": [20, 43], "1300": 56, "131": [20, 43], "131072": [9, 132, 133], "13195": [42, 132], "132": [20, 132], "1323": 160, "1328": 160, "1329": 160, "133": [20, 160], "133120": 29, "13368": 132, "1337": 160, "134": [20, 43], "1341": 2, "1343": 160, "1344": 160, "135": [20, 43], "13525": 132, "13598": [42, 132], "136": [20, 43], "137": [20, 42, 43, 132], "1378": 151, "138": [16, 20], "139": [20, 43], "1392": 160, "13b": [4, 86, 87, 105, 171, 172], "14": [7, 16, 17, 19, 20, 26, 42, 43, 118, 124, 132, 135, 138, 139, 151], "140": [8, 20, 30], "140g": 127, "141": [5, 20], "1418": [42, 132], "141gb": [3, 43], "142": [15, 16, 20, 43], "1424": 160, "143": 20, "1436": [2, 160], "1437": 151, "144": 135, "1446": 160, "1447": 160, "14480": [42, 132], "1449": 160, "145": [138, 139], "1459": 151, "146": [43, 138, 139], "1467": 160, "147": [133, 135, 138, 139], "14702": 10, "148": 43, "1480": 160, "1486": 160, "149": [151, 160], "15": [13, 16, 17, 18, 19, 20, 26, 33, 34, 43, 118, 132, 139, 141, 151], "150": [20, 41, 82, 131, 166], "1500": 43, "15043": 47, "151": 43, "1514": 160, "152": [16, 42, 43, 132], "1529": 160, "153": 26, "1534": 160, "1535": 160, "1536": 2, "1537": 160, "1539": 160, "154": [13, 26, 27], "1552": 160, "1556": 151, "15585": [42, 132], "1562": 160, "1564": [133, 138, 139], "157": 43, "158": 2, "1583": 160, "1584": 2, "1585": 133, "1589": 160, "1590": 160, "1597": 135, "15b": [86, 171], "15u": 15, "16": [0, 2, 4, 7, 8, 11, 12, 13, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 33, 34, 42, 43, 44, 46, 62, 64, 65, 66, 79, 80, 82, 88, 95, 104, 106, 113, 118, 119, 121, 125, 132, 133, 134, 141, 142, 143, 147, 149, 151, 164, 166, 168, 173], "160": [16, 160], "1607": [42, 132], "161": [27, 34, 42, 132], "162": 16, "1625": 135, "1626": 160, "163": 3, "163061": 10, "163062": 10, "1637": 160, "16384": [28, 29, 31, 32, 135, 138], "164": [13, 16], "1642": 160, "1643": 26, "165": 43, "1650": 160, "1651165696": 20, "166": 43, "1660": 160, "1664": 29, "1669": 160, "167": [42, 132], "1671517696": 20, "1672": 151, "1674": 160, "1675": 160, "167507": 16, "1676": 160, "168": 13, "16e": [19, 32, 35, 86, 150, 157, 171], "16x": [14, 147], "17": [0, 2, 16, 18, 19, 20, 21, 28, 29, 31, 32, 42, 43, 118, 132, 138, 151, 153], "170": 43, "1706": 141, "171": 16, "1721": 151, "1723": 160, "172321": 2, "17233": 2, "173": [13, 43], "1732": 160, "17323": 149, "1738": 160, "1741966075": 159, "1742": 160, "17453": [23, 144], "17453v3": 1, "1748018634": 18, "175": 43, "1753843963": [31, 32], "1754294810": 28, "1754358426": 21, "1755815898": 29, "1759022940": [33, 34], "175b": 5, "176": 132, "1762": 160, "1764866686": 30, "1774995776": 20, "1776": [86, 171], "1799": 160, "17b": [18, 19, 35, 43, 86, 150, 157, 171], "17x": 20, "18": [15, 16, 19, 20, 43, 118, 132, 151], "180": [11, 13, 147], "1806": 2, "180b": [7, 42, 132], "180gb": 43, "181": 43, "1815": 160, "181540": 2, "182": 43, "1822": 47, "1834": 160, "185": [4, 42, 132], "1851": 160, "18527": 47, "18563": [42, 132], "1861": 139, "1866": 139, "187": 16, "188415": 20, "188416": 20, "1885": 133, "1886": 160, "1897": 160, "19": [2, 16, 19, 20, 21, 43, 139, 151], "190": 43, "1909": 160, "192": [3, 12, 16], "1926": 160, "192gb": 43, "1937": 160, "1939": 160, "194": 43, "1944": 138, "195": 43, "1950": 16, "1953": 160, "1959": [42, 132], "1963": 16, "198": [13, 19], "1985": 160, "1987": 160, "199": 43, "1993": [61, 151], "1999": 160, "1_1b": [86, 171], "1_405b": 125, "1_70b": [26, 125], "1_output": 26, "1b": [17, 27, 40, 44, 46, 50, 52, 53, 54, 56, 57, 58, 59, 60, 63, 67, 68, 69, 72, 74, 76, 81, 82, 85, 86, 88, 109, 150, 152, 154, 157, 159, 160, 165, 166, 170, 171], "1d": [79, 113, 141, 146], "1e": [124, 141, 142, 143], "1e20f": 1, "1g": 151, "1gb": [88, 95, 110, 173], "1k": [2, 12, 13, 14, 15, 20, 21, 28], "1k2k": 21, "1m": 139, "1st": [4, 26, 28, 29, 31, 32, 141, 147], "1u": [0, 1], "1x": [13, 21], "1xgpu": 21, "1xh200": 3, "1xtep": 17, "1ytic": 160, "2": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 17, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 42, 43, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 79, 80, 81, 82, 84, 86, 88, 91, 94, 95, 98, 100, 101, 104, 105, 107, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 121, 122, 124, 125, 127, 132, 134, 135, 138, 139, 141, 143, 146, 149, 150, 151, 155, 157, 158, 159, 164, 165, 166, 168, 169, 171, 173, 177], "20": [10, 11, 15, 16, 17, 19, 20, 26, 27, 28, 29, 30, 31, 32, 33, 43, 69, 72, 74, 75, 95, 114, 121, 122, 132, 133, 138, 141, 146, 151, 155, 173], "200": [5, 9, 16, 18, 21, 28, 29, 30, 31, 32, 33, 34, 63, 82, 146, 155, 166], "2000": [15, 43], "20000": [36, 43, 155], "200mb": 15, "2017": 138, "2018": 160, "202": 16, "2023": [3, 151], "2024": [13, 97, 174], "2025": [2, 11, 13, 132], "2028": 160, "2033": 139, "2039": 160, "204": [13, 43], "2040": 160, "2042": 2, "2044": [138, 139], "2045": 138, "2048": [2, 3, 5, 6, 15, 22, 23, 26, 28, 29, 30, 31, 32, 42, 43, 70, 82, 88, 124, 132, 133, 135, 136, 137, 138, 139, 143, 146, 151, 155, 160, 166], "205": [16, 43], "2056": 160, "206": 43, "20627": 47, "20685": [42, 132], "2079": 151, "208": 43, "2081": [135, 138, 160], "2087": 160, "2089": 43, "209": [16, 43], "20b": 160, "20k": 21, "21": [2, 7, 13, 16, 19, 20, 43, 121, 138, 151], "2101": 112, "2102": 43, "2106": 118, "2107": 151, "210g": 127, "211": 13, "2113": 160, "212": [16, 43], "2135": 160, "214": 19, "2145200": 90, "2145206": 90, "215": 29, "2152": 160, "2158": 43, "2168": 2, "2169": 160, "21747": [42, 132], "2176": 43, "21764": [42, 132], "2182": 160, "2191": 160, "22": [15, 16, 20, 28, 31, 32, 40, 43, 109, 129, 141, 151], "220": 43, "22000": 43, "22056": [42, 132], "221": [43, 132], "2210": 149, "2211": [141, 149], "2219": 160, "222": 43, "22213": [42, 132], "2225": 151, "2232": 160, "224": 142, "2243": 160, "225": [29, 43], "2263": 160, "227": 6, "2288": 160, "2294": 160, "22b": [86, 171], "22x": 14, "23": [8, 16, 19, 20, 42, 43, 132, 151, 160], "2305": 151, "2306": 149, "2309": [1, 23, 144], "232": [6, 16], "234": 16, "2352": 160, "2357": 160, "235b": [34, 86, 171], "236": 13, "2366": 160, "2370": 160, "2373": 160, "2379": 160, "238": 43, "2388": 160, "239": 13, "2397": [42, 132], "24": [0, 16, 20, 42, 43, 109, 132, 151, 153, 160], "2401": 0, "2402": 118, "2405": 144, "24189": 43, "2419": 160, "2425": 160, "243": 16, "2439": 160, "244": 43, "245": 13, "2458": 160, "246": 16, "2461": 138, "2466": 138, "2473": 160, "2474": [135, 138], "2484": 160, "2485": 160, "2487": 43, "249": 13, "24b": [150, 160], "24mib": 16, "25": [6, 8, 13, 16, 17, 20, 42, 43, 132, 150, 160], "250": [2, 13, 16, 104], "2500": 43, "2503": 150, "25032": [42, 132], "251": [16, 43], "252u": 15, "253": [13, 43], "253b": [86, 171], "2552": 160, "256": [1, 2, 3, 6, 12, 15, 16, 20, 24, 26, 28, 29, 30, 31, 32, 33, 34, 43, 62, 64, 70, 80, 90, 93, 104, 132, 141, 143, 151, 152, 155, 160, 164, 168], "25603": [42, 132], "257": 43, "2573": 160, "2581": [135, 138], "2590780": 132, "259840": 147, "26": [16, 20, 43, 132, 135, 159], "2602": 47, "2628": [138, 139], "263": [3, 47], "2640": 139, "2649": 151, "266": 43, "2671": 2, "2677": 160, "26778": [42, 132], "2679": 135, "2685": 160, "2691": 160, "27": [20, 43, 160], "270m": [86, 171], "271": 43, "2712": 160, "274": [2, 43, 160], "2742": 133, "275": 160, "2755": 2, "2766684": 2, "278": 47, "2782": 160, "2787": 160, "2796": 160, "27b": [86, 150, 171], "27th": 33, "28": [13, 20, 43, 132, 151], "2820": 151, "283": 43, "28390": 132, "287113": 132, "288": [16, 28, 160], "29": [20, 147, 160], "290": 16, "2939": 151, "295": 43, "296": 43, "297": 47, "29892": 47, "299": [13, 42, 132], "29962": 47, "2998": 151, "2b": [42, 86, 127, 132, 171], "2cta": 15, "2d": [121, 141, 142, 149], "2gib": 160, "2k": [2, 13, 14, 15, 21], "2m": 139, "2nd": 141, "2u": 1, "2x": [4, 5, 10, 17], "2xdep": 17, "3": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 21, 24, 26, 27, 28, 32, 35, 38, 42, 52, 57, 59, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 79, 80, 82, 86, 88, 89, 90, 94, 98, 100, 102, 104, 105, 109, 111, 113, 115, 117, 118, 126, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 146, 148, 151, 152, 154, 155, 156, 157, 158, 159, 160, 161, 164, 166, 168, 171, 175, 176], "30": [0, 2, 10, 13, 17, 18, 19, 20, 121, 133, 135, 139, 141, 147, 155, 158], "300": [6, 16, 42, 132], "3000": [19, 42, 43, 132], "30000": 43, "30065": [42, 132], "300k": 19, "3019": [42, 132], "3021": 2, "3022": [42, 132], "303": 5, "3031": 138, "304": 47, "3040": [133, 138, 139], "305": 43, "306": 47, "306u": 12, "307": [26, 43], "3072": [2, 104], "3073": 104, "309": 43, "3095": 160, "30990": [42, 132], "30b": [7, 34, 35, 86, 150, 157, 171], "30x": 7, "31": [8, 20, 43, 119, 133, 138, 139, 160], "3106": 160, "311": 43, "312": 8, "3121": 27, "3126": 27, "3132": [42, 132], "315": 13, "3159": 11, "316": 43, "319": 43, "31st": 12, "32": [1, 2, 4, 5, 11, 12, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 47, 53, 54, 61, 70, 79, 80, 82, 91, 104, 113, 117, 119, 132, 133, 141, 142, 143, 144, 146, 147, 149, 151, 155, 159, 160, 164, 166, 168], "3200": 28, "3201": 133, "321": [43, 132], "322": 47, "323": 43, "325": 43, "326": 43, "3276": [133, 138, 139], "32768": [29, 70, 141], "329": 43, "3291": 151, "32b": [86, 150, 157, 160, 171], "32k": [10, 160], "32x": 7, "33": [8, 10, 19, 20, 43, 151, 160], "330": 43, "331": 43, "332": 43, "3328": 151, "332826": 2, "3338": 133, "336": 43, "338": [13, 47], "3389": 135, "339": 43, "339447": 16, "339981": 16, "33x": 14, "34": [2, 8, 16, 20, 43], "340": [11, 13, 43], "341": [5, 16], "342": 43, "3442": 151, "3445": 151, "345": 43, "3452": [42, 132, 151], "3476": 2, "348": 43, "348gib": 16, "349": [5, 43], "34b": [86, 160, 171], "35": [0, 20, 43, 54, 93], "350": 29, "3504": 16, "351": 43, "352": 43, "3555": 151, "356": 43, "36": [13, 16, 19, 20, 134, 135], "360": 29, "36384": 2, "3671": [42, 132], "367714": 16, "368": 13, "369": 43, "37": [8, 16, 19, 20, 43, 132], "370318": 20, "3763": 13, "3764": 160, "378": 43, "38": [8, 20, 42, 132], "381": 16, "384": [2, 34, 43], "3863": 43, "387b12598a9e": 132, "3887": 151, "389": 43, "39": [8, 13, 20, 43, 64], "391": 43, "3914": 43, "393": 43, "3936": [42, 132], "3977": 151, "399": 43, "3_1": [86, 150, 157, 171], "3_3": [86, 150, 157, 171], "3_output": 31, "3b": [27, 45, 51, 73, 86, 150, 171], "3d": [79, 113, 141, 146], "3rd": 141, "3u": 1, "3x": [7, 13, 15], "4": [0, 1, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 42, 47, 54, 62, 64, 65, 66, 68, 80, 82, 86, 88, 95, 97, 98, 99, 100, 102, 103, 104, 105, 115, 117, 118, 119, 121, 125, 127, 132, 133, 135, 136, 137, 138, 139, 140, 141, 143, 144, 146, 147, 148, 149, 150, 151, 155, 157, 158, 160, 164, 166, 168, 171, 173, 174], "40": [19, 20, 29, 33, 43, 69, 114, 135, 141, 158, 160], "400": [15, 16], "4000": [15, 19], "401": 43, "403": 160, "405b": [42, 86, 132, 134, 160, 171], "4060": 147, "4066": 47, "407": 43, "408348": 16, "4089": 139, "4096": [3, 11, 15, 26, 28, 29, 33, 42, 43, 47, 93, 132, 135, 141, 142, 146], "40b": 7, "40x": 7, "41": 20, "410": 43, "4101": 160, "41020": 132, "411": [42, 132], "4117e": [42, 132], "4133": 139, "4135": 160, "41375": 132, "414": 2, "4141": 160, "416": 30, "41607": 132, "4168": 2, "4192": 151, "42": [16, 20, 29, 33, 43, 132], "420": [21, 22], "421": 43, "422": 43, "4224": 43, "4227": 160, "4248": 135, "4265": 132, "427": [42, 132], "4280": 13, "43": [19, 20, 43, 147, 159], "431": 43, "43146": 2, "434": 43, "435": 43, "437": 43, "4384": 16, "44": [10, 16, 19, 20, 26, 43, 147], "4408": 47, "442": 43, "4439": [42, 132], "4456": 43, "447": 43, "449": 160, "4493": [138, 139], "4495": 16, "4497": 43, "44x": 7, "45": [16, 20, 33, 117, 150, 160], "450": 43, "45000000000": 117, "450m": [86, 171], "453": 43, "4532": 160, "4548": 2, "456": 43, "4566": 43, "457": 43, "458676": 16, "459": 43, "46": [7, 20], "4600": 15, "461014": 16, "463": 43, "464": [16, 43], "465004": 20, "4653": 47, "4656": 43, "4667": 43, "4678": 160, "47": [7, 16, 20, 135], "4701": [42, 132], "472": 47, "476": 43, "4767": 160, "478": 160, "4798": 160, "47x": 7, "48": [16, 20, 135, 147, 160], "4809": 160, "480gb": [16, 20], "481": 4, "482": 160, "4853": 160, "489795": 20, "489935": 20, "49": [16, 20, 135], "491": [16, 43], "49152": 2, "494": 43, "495": 43, "496": [43, 119], "4963": [42, 132], "4963654": 37, "498043": 16, "499": [8, 43], "4993": 160, "49b": [86, 150, 157, 171], "4b": [86, 160, 171], "4bit": 3, "4gb": 15, "4gpu": 21, "4k": [20, 86, 171], "4u": 1, "4x": [3, 4, 5, 17, 21, 105], "4xgpu": 21, "5": [0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 27, 28, 29, 30, 31, 32, 33, 34, 42, 43, 45, 51, 56, 68, 69, 73, 86, 88, 98, 100, 102, 104, 117, 118, 121, 122, 124, 132, 138, 141, 143, 146, 148, 150, 151, 155, 157, 160, 171, 176], "50": [0, 7, 8, 10, 11, 16, 17, 20, 29, 42, 56, 68, 69, 82, 95, 132, 155, 160, 166, 173], "500": [13, 15, 29, 43], "5000": 43, "500000": 143, "5007": 47, "500m": 7, "50272": 124, "5029": 160, "505": 43, "5064": 43, "5073": 151, "50m": 16, "51": [20, 64, 66], "511": 43, "512": [1, 2, 5, 6, 20, 22, 24, 26, 28, 29, 30, 34, 43, 48, 68, 104, 118, 122, 132, 135, 138, 143, 155], "5120": 2, "512mb": [88, 110], "513": 43, "5141": 20, "518": 47, "51b": [86, 150, 157, 160, 171], "51x": 7, "52": [16, 20, 43, 64], "5213": 20, "5215": 20, "5224": 20, "52269": 43, "524288": 20, "526": 160, "52667": 43, "5284": 20, "529514": 16, "5299": 135, "53": [16, 20, 64, 132, 138, 139, 160], "530": 43, "5305": 135, "535": 18, "5371": 160, "5373": 160, "537602": 16, "538": 43, "5393": 2, "54": [7, 8, 20, 64], "540": [42, 43, 132], "5417": 160, "5436": 160, "5443839": 2, "54576": 2, "5496": 135, "5497": 43, "55": [7, 10, 19, 20, 66, 132], "5500": 43, "551": 43, "5510": [42, 132], "5514": [42, 132, 160], "5519": 160, "552": [8, 18], "5520": 160, "5530": 43, "5531": 160, "5534": 160, "554": 43, "5558": 160, "556": 43, "5563": 160, "5564": 160, "5568": 160, "5570": 20, "558": 43, "56": [7, 16, 20, 26, 65], "560": 3, "562": [118, 122], "5636": 160, "564": 16, "5642": 160, "564272": 16, "566": 43, "5669": 160, "568": 132, "5698": 160, "57": [16, 20, 43, 65, 132], "570": 16, "572": 43, "5739": 2, "5742": [135, 138], "575": [28, 29, 30, 31, 32, 33, 34], "5761": 160, "5772": 160, "5779": 160, "5782": 160, "579": 43, "58": [13, 16, 20, 34, 65, 138], "5800": 160, "5801": 160, "5809": 160, "581": 43, "5815": 160, "5816": 160, "5821": 43, "5823": 160, "5825": 160, "5830": 151, "5835": 160, "5855": 160, "5874": 151, "5877": 135, "5879": 151, "58x": 13, "59": [10, 16, 19, 20, 26, 132], "590": 47, "5900": 160, "5902": 160, "5904": 160, "5907ed752eb44d11a12893b19f79f8ca": 30, "591": 43, "5918": 151, "5921": 160, "5925": 160, "5931": 160, "594": 43, "5941": 160, "5947": 160, "5949": 160, "5954": 160, "5957": 151, "5963": 20, "5975": 160, "5976": 135, "5980": 135, "599": 43, "5992": 160, "5b": [86, 160, 171], "5k": 21, "5m": 21, "5th": [15, 141], "5u": 1, "5vl": [26, 160], "5x": [4, 7, 13, 17], "6": [0, 1, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 28, 30, 31, 32, 33, 43, 88, 114, 117, 118, 121, 141, 146, 150, 151, 155, 160], "60": [0, 8, 10, 16, 19, 20, 155], "600": 128, "6000": [10, 42, 43, 132, 158, 160], "6014": 160, "602": 43, "603": 43, "6049": 135, "605": 43, "6059": [42, 132], "6064": 151, "6065": 160, "6075": 160, "608": 43, "6080": 160, "61": [20, 26, 93], "610": 43, "6100": 2, "6103": 160, "612328": 16, "613": 43, "6136": 160, "6140": 160, "615": 43, "6157": 151, "616": 43, "617": 43, "61954812": 38, "62": [13, 19, 20, 43, 138], "622": 26, "623": [26, 43], "623219": 16, "6255": 151, "626": 47, "6299": 151, "63": [19, 20, 43, 70, 106, 132, 138, 143, 147, 155], "630": 43, "6300": 10, "63266": 133, "633": 29, "63307": 133, "63308": 133, "63331": 133, "63374": 133, "634": 43, "6344": 160, "63456": 133, "6345624": 133, "636": 43, "6372": 135, "6376": 2, "639": 160, "6393": 12, "64": [0, 1, 2, 4, 5, 11, 12, 16, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 42, 43, 45, 51, 67, 70, 73, 79, 80, 84, 94, 95, 104, 113, 114, 124, 132, 138, 141, 142, 143, 147, 155, 160, 164, 168, 169, 173], "640": [3, 21], "640gb": 15, "6429": 160, "643": 43, "6437": 160, "644": 43, "6447": 160, "6452": 139, "646": 43, "6475": 138, "649": 160, "6499": 10, "64ac201c77bf46a7a3a4eca7759b1fd8": 33, "64k": 91, "64x": 13, "65": [10, 20], "650": 43, "65024": 151, "651": 43, "65100": 2, "651199": 16, "6523": 139, "653": 8, "6538": 12, "654": 5, "6550": 135, "65536": 20, "6554": 138, "658": 43, "6591": [42, 132], "66": [13, 19, 20, 28, 29, 31, 32, 43], "660": 43, "662": 43, "6628": [138, 139], "664": 8, "665": 43, "666": 43, "6678": 147, "6684": 139, "6695": 147, "67": [7, 13, 16, 20], "6704": 10, "671": 2, "67108864": [9, 18, 21, 26, 106, 159], "671b": 14, "672": 43, "673": 160, "675": 132, "6753e": [42, 132], "676": 43, "6769": 138, "677": 43, "6774": 10, "678": 43, "679": [4, 43], "68": [7, 13, 20, 43, 139], "6825": [42, 132], "684": 13, "6852": [135, 138], "6862": 132, "6890": 151, "69": [7, 13, 16, 20, 139, 159], "6925": [42, 132], "6938": 47, "6948": 10, "695": [43, 160], "697": [15, 43], "6973": 12, "6975": 135, "6976": [133, 138, 139], "699": 43, "6a": 3, "6b": [4, 42, 86, 132, 141, 160, 171], "6x": [5, 17, 105], "7": [0, 1, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 28, 29, 32, 33, 34, 42, 43, 53, 69, 70, 82, 88, 103, 106, 117, 118, 132, 133, 141, 146, 151, 166], "70": [0, 7, 8, 16, 19, 20, 139, 147], "700": 128, "7000": [42, 132], "701": 160, "703": 10, "7031": 135, "705": [15, 29, 160], "7063": [42, 132], "707": 43, "7072": 43, "708": 19, "709": 132, "7090": 151, "70b": [5, 7, 10, 24, 26, 35, 42, 79, 86, 103, 105, 127, 133, 135, 136, 137, 138, 139, 140, 150, 157, 160, 171], "70g": 127, "71": [13, 20, 132], "712": 43, "7128": 29, "7134": 151, "7136": 133, "714": 43, "7144": 151, "715": 43, "7155": 12, "7168": [2, 13, 15], "717498": 16, "7187": 43, "7188": 2, "72": [10, 16, 20, 29, 134, 160], "720": [29, 33], "7206": 2, "722": 43, "724": 43, "727": 43, "728516": 16, "72b": [150, 157, 160], "73": [13, 19, 20, 34], "734": 43, "738": 43, "7382": 43, "739": [43, 160], "73x": 17, "74": [13, 20, 29, 33], "741": [43, 160], "7422": 12, "744": 43, "7456": 2, "74561": 2, "7480": 133, "7481": 10, "749": 43, "74x": 11, "75": [7, 16, 18, 20, 29, 30, 132, 160], "750": 5, "7502": 133, "7520": 2, "755": [43, 128], "7571": 12, "7584": 2, "75903": 43, "76": 20, "7607": 138, "7610": 12, "7612": 12, "762": 43, "7621": 43, "7638": [133, 138, 139], "7657": 2, "766": 43, "76665782272": 27, "767": 43, "768": [24, 30, 124, 142], "77": [16, 19, 20, 26], "770": 43, "7743": 133, "7770": 133, "78": [13, 20, 135], "780": [42, 132], "781": 43, "782": 43, "783": 43, "7842": 135, "785": 43, "78509": 43, "787": 43, "7871": 10, "7876": 138, "789": 43, "7891": 12, "7898": 12, "79": [20, 43, 132, 147], "790": 10, "7900": 151, "791": 16, "792": 16, "7933": 138, "794": 160, "7949": 151, "795": 43, "797": 43, "7977": [12, 135], "798": 43, "799": 43, "7a": 3, "7b": [7, 11, 26, 27, 42, 43, 65, 75, 86, 96, 112, 118, 121, 122, 132, 150, 157, 160, 171], "7b_model": 26, "7k": 11, "7x": [4, 13, 17], "8": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 47, 48, 52, 57, 58, 59, 60, 63, 64, 65, 66, 68, 69, 70, 71, 79, 80, 82, 84, 95, 99, 102, 104, 109, 112, 113, 117, 118, 119, 124, 125, 127, 132, 133, 134, 135, 140, 141, 142, 143, 144, 147, 149, 151, 152, 154, 155, 159, 164, 166, 168, 169, 173, 176], "80": [0, 5, 12, 13, 15, 20, 106, 114, 160], "800": [3, 29, 160], "8000": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 44, 45, 46, 48, 50, 51, 72, 73, 74, 75, 76, 88, 90, 95, 159, 168, 173], "8001": [17, 88], "8002": [17, 88, 132], "8003": [17, 88], "8004": 17, "8005": 43, "803": [3, 8], "804": 43, "8044": 12, "8048": 132, "80833": 26, "80b": [33, 35, 157], "80gb": [4, 7, 43, 133, 135, 136, 137], "81": [13, 16, 20, 135], "812": 43, "813": 43, "8140": 2, "8149": 151, "817": 43, "8179": 151, "819": 5, "8192": [11, 12, 23, 26, 28, 43, 48, 98, 132, 133, 138, 141, 142, 151, 155, 160], "81920": 70, "82": [13, 20, 135], "820": 132, "821": 43, "8212": [0, 1, 30], "8218": 151, "8225": 135, "825": 160, "8259": [42, 132], "828": 43, "829": 43, "83": [20, 26], "830": 11, "8307": 139, "831": 43, "833": 43, "835": 43, "8351": [42, 132], "838": 43, "8393": 31, "84": [8, 13, 20, 26], "844": 43, "8441": [42, 132], "8448": 30, "849": 43, "84d2f12": 12, "85": [2, 7, 20, 26, 132, 160], "852": 43, "854": 43, "856": 43, "86": [10, 20, 106], "863": [42, 132], "8672": 151, "87": [7, 8, 10, 16, 20, 29, 65], "870": 43, "871": 43, "872": 43, "876": 43, "8779": 151, "878": 19, "88": [20, 26, 135, 139], "8804": 133, "880676": 16, "881": 43, "88226": [42, 132], "8828": 151, "8841": 135, "8870": 31, "89": [7, 10, 13, 20, 106, 150], "8908": 32, "893": 43, "8932": [42, 132], "895": 43, "8958": 139, "896": 2, "898": 43, "8984": 32, "899": 43, "8a": 6, "8b": [10, 24, 42, 52, 57, 61, 62, 70, 71, 80, 82, 86, 90, 96, 100, 102, 103, 109, 132, 150, 152, 154, 157, 159, 164, 166, 168, 171, 175, 176], "8bit": 4, "8k": [12, 20, 28, 160], "8tb": 5, "8x": [15, 17, 18, 21], "8x22b": [86, 171], "8x7b": [42, 112, 132, 150, 157, 160], "8xb200": [13, 18], "8xgpu": [15, 21], "8xh100": 6, "8xh200": 3, "9": [0, 1, 4, 11, 13, 14, 16, 17, 19, 20, 21, 26, 29, 31, 32, 38, 43, 53, 64, 65, 66, 69, 80, 93, 109, 118, 121, 127, 135, 141, 151, 155, 164, 168], "90": [0, 2, 16, 19, 20, 29, 42, 43, 53, 69, 93, 106, 120, 132, 133, 135, 140, 147, 155], "9007": 2, "902": 43, "9028": 151, "907": 4, "9075": 32, "908": 43, "9087": 139, "909": 43, "91": 20, "910": 43, "9101": 43, "9115": 139, "912": 8, "912656": 2, "916": 43, "9184": 135, "9189": 32, "919": 43, "9197": 2, "92": [13, 20, 26], "9203": 135, "9214": 43, "9216": 28, "922": 43, "9223372036854775807": 155, "9236": 30, "924": [24, 43, 124], "925": 19, "9263": 2, "9274": [10, 133], "93": [2, 16, 20], "931": 43, "932": 43, "933": 43, "9348": 31, "935": 160, "9353e": 133, "9356": 31, "937": 43, "9379": 2, "939": 43, "94": [20, 65], "94022": 43, "941": [3, 6, 43], "943": 43, "944": 43, "9447": 28, "946": 3, "9462": 28, "948": [16, 43], "949": 43, "9494": 138, "95": [11, 20, 27, 34, 43, 52, 57, 58, 59, 60, 63, 68, 69, 70, 109, 133, 140, 152, 154, 159], "9500": 28, "9521": 151, "953": 43, "9537": 135, "9538": 28, "954": [15, 43], "955200": 16, "957": 43, "96": [3, 13, 15, 19, 20, 135, 160], "960": 3, "9606": 15, "960gb": 16, "961": 43, "9613": 15, "9623": 138, "9629": 15, "9639": 43, "965": 43, "96583": 43, "967": 160, "9692": 151, "97": [15, 20, 132, 135], "972": 43, "976442": 16, "977": 43, "978": 43, "98": 20, "981": 43, "983": 160, "9845": 26, "987": 160, "9898": 2, "99": [13, 16, 19, 20, 117, 128], "992": 160, "9928": 139, "993": 43, "9938": 2, "994": 43, "9980": 26, "9982": [138, 139], "9b": [86, 171], "9f": 0, "9x": [5, 6], "A": [0, 1, 2, 7, 8, 10, 13, 16, 18, 19, 24, 26, 28, 29, 31, 32, 33, 34, 37, 41, 42, 43, 61, 62, 63, 64, 65, 66, 67, 79, 86, 87, 88, 92, 93, 98, 99, 103, 104, 105, 110, 111, 113, 114, 116, 118, 121, 124, 125, 127, 131, 132, 141, 146, 148, 150, 152, 154, 155, 156, 157, 160, 161, 163, 171], "AND": 141, "And": [11, 14, 15, 16, 33, 103, 121, 127, 141, 142, 147], "As": [10, 11, 12, 14, 16, 17, 19, 20, 21, 28, 47, 80, 88, 94, 106, 112, 113, 115, 118, 121, 125, 135, 139, 140, 141, 147, 149, 151, 160, 163, 164, 177], "At": [10, 15, 20, 21, 24, 39, 67, 97, 123, 135, 142, 147, 174], "Being": 37, "But": [11, 16, 20, 33, 79, 104, 113, 116], "By": [0, 1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 29, 38, 40, 47, 79, 87, 88, 104, 106, 109, 110, 114, 120, 121, 132, 135, 139, 141, 151, 155, 163, 172], "For": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 40, 42, 43, 47, 52, 53, 61, 64, 65, 66, 77, 78, 79, 80, 81, 82, 84, 87, 88, 90, 91, 93, 94, 95, 96, 98, 99, 101, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 120, 121, 124, 125, 126, 127, 132, 133, 134, 135, 138, 139, 140, 141, 144, 146, 147, 151, 152, 154, 155, 156, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 172, 173, 177, 178], "If": [0, 1, 2, 7, 9, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 42, 61, 63, 78, 79, 87, 88, 90, 92, 93, 94, 98, 100, 102, 103, 104, 106, 107, 109, 110, 111, 112, 113, 114, 115, 117, 118, 119, 120, 121, 124, 125, 126, 127, 128, 129, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 150, 151, 154, 155, 156, 159, 160, 161, 163, 166, 172, 177, 178], "In": [0, 1, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 33, 35, 38, 39, 42, 43, 47, 67, 86, 87, 88, 91, 92, 94, 96, 101, 103, 104, 105, 106, 107, 115, 116, 119, 121, 125, 126, 127, 129, 132, 133, 134, 135, 138, 139, 141, 147, 149, 150, 151, 155, 158, 159, 160, 163, 171, 172, 177, 178], "It": [0, 1, 2, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 42, 43, 47, 61, 63, 64, 65, 67, 69, 79, 87, 88, 90, 92, 93, 94, 98, 99, 101, 103, 104, 105, 106, 109, 111, 113, 114, 115, 118, 121, 123, 125, 126, 132, 135, 136, 137, 138, 139, 140, 141, 144, 149, 151, 154, 155, 156, 158, 159, 161, 162, 163, 166, 172, 178], "Its": [11, 62, 79, 98, 113, 141, 158, 163], "NOT": [61, 104, 141], "Near": 8, "No": [0, 16, 27, 33, 36, 42, 67, 88, 89, 104, 110, 117, 132, 133, 148, 157], "Not": [1, 2, 7, 21, 93, 104], "ON": [132, 135, 138, 139], "OR": 141, "Of": [13, 28, 160], "On": [12, 16, 19, 20, 29, 41, 79, 91, 104, 106, 109, 113, 117, 131, 134, 139, 141, 160], "One": [10, 11, 16, 20, 89, 93, 124, 125, 138, 141, 151, 157, 162], "Or": [33, 106, 141, 146, 175], "TO": [8, 64, 65, 66], "That": [11, 33, 37, 39, 43, 79, 98, 104, 111, 113, 114, 117, 125, 135, 141, 155], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, 90, 91, 92, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 131, 132, 133, 134, 135, 138, 139, 140, 141, 142, 143, 144, 146, 147, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178], "Their": [15, 18], "Then": [11, 12, 14, 16, 20, 26, 27, 33, 42, 82, 94, 118, 127, 128, 132, 133, 141, 154, 156, 161, 166, 177], "There": [5, 10, 13, 14, 15, 16, 17, 19, 28, 29, 47, 79, 87, 88, 98, 103, 106, 108, 109, 110, 113, 114, 115, 116, 117, 118, 124, 127, 141, 144, 147, 149, 151, 154, 160, 162, 163, 172, 177, 178], "These": [3, 5, 6, 8, 10, 11, 12, 13, 15, 16, 17, 20, 28, 29, 31, 32, 33, 34, 36, 38, 42, 47, 78, 88, 92, 100, 103, 121, 127, 132, 133, 134, 142, 144, 160], "To": [2, 5, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 41, 42, 43, 79, 81, 85, 87, 88, 92, 93, 94, 97, 98, 99, 101, 102, 103, 104, 106, 111, 113, 117, 118, 120, 121, 122, 125, 126, 127, 131, 132, 135, 138, 139, 140, 141, 147, 149, 153, 154, 155, 156, 158, 159, 160, 161, 163, 165, 170, 172, 174, 177, 178], "WITH": 69, "Will": [0, 1, 12], "With": [10, 11, 12, 16, 17, 19, 21, 29, 30, 47, 62, 79, 88, 94, 98, 105, 110, 113, 114, 121, 125, 128, 132, 155], "_": [0, 10, 11, 36, 40, 97, 98, 104, 111, 126], "_1": [26, 28, 29, 31, 32], "_2": [26, 28, 29, 31, 32], "__all__": [156, 161], "__call__": [63, 102], "__dict__": 155, "__file__": 61, "__getitem__": 155, "__global__": 12, "__init__": [11, 36, 61, 63, 79, 87, 104, 115, 123, 125, 126, 132, 151, 155, 156, 160, 161, 163, 172, 178], "__main__": [52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 70, 71, 90, 109, 133, 135, 139, 140, 152, 154, 156, 159, 160, 161], "__name__": [52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 70, 71, 90, 109, 133, 135, 139, 140, 152, 154, 156, 159, 160, 161], "__post_init__": 160, "__pydantic_extra__": 155, "__pydantic_fields_set__": 155, "__repr__": 160, "__traceback__": 155, "__version__": [107, 109], "_autodeploi": [22, 27, 80, 164, 168], "_autodeployllmarg": 160, "_callback": [87, 172], "_capac": 1, "_checkpoint_format": [87, 172], "_chunk_token": 61, "_config_load": [87, 172], "_context_logits_auto_en": 155, "_count": 36, "_cpp_gen": 111, "_create_tensor": 125, "_custom_transform_funct": [87, 172], "_executor_loop": 103, "_fields_set": 155, "_file_path": 61, "_forward_step": [40, 97], "_generation_logits_auto_en": 155, "_get_config_dict": [82, 166], "_get_pretrained_config": [87, 172], "_handl": 1, "_hash_token": 61, "_hf_model_dir": 155, "_len_": 36, "_limit": 36, "_llm_arg": 61, "_load_pretrained_config": [87, 172], "_map": [87, 172], "_mark_output": 151, "_metadata": 61, "_modelformatkind": 155, "_mpi_sess": 155, "_n": [26, 28, 29, 31, 32], "_note": [79, 113], "_parallelconfig": 155, "_parent_namespace_depth": 155, "_path": 2, "_postproc_param": 155, "_postprocess_result": 155, "_process_previous_batch": [40, 97], "_ratio": 36, "_releas": 1, "_return_log_prob": 155, "_run": 151, "_runtim": 146, "_sample_async": [40, 97], "_savehiddenstatesdecodingconfig__context": 155, "_schedul": [40, 97], "_size": 36, "_sliding_window_pattern": 143, "_static": 125, "_str_to_trt_dtype_dict": 141, "_tensorrt_engin": 152, "_torch": [10, 42, 61, 70, 79, 82, 84, 87, 91, 103, 104, 132, 153, 155, 156, 160, 161, 163, 166, 169, 172, 175], "_torchllm": 155, "_types_namespac": 155, "_unsign": 1, "_util": 141, "_was_": 155, "_weight_load": [87, 172], "_weight_mapp": [87, 172], "a100": [21, 37, 105, 114, 127, 158], "a100x": 37, "a2": 160, "a22b": [34, 86, 171], "a2a": [12, 160], "a3b": [33, 34, 35, 86, 150, 157, 171], "a8": 149, "a_": 141, "a_1": 141, "a_2": 141, "a_n": 141, "a_sf": 141, "aarch64": 150, "ab": [23, 61, 118, 141, 144, 149, 155], "abbrevi": [12, 27, 90], "abc": [11, 14], "abc123def456": 34, "abcd": 14, "abi": [106, 160], "abil": [1, 42, 132, 160], "abl": [4, 10, 13, 16, 20, 42, 79, 98, 103, 109, 113, 132, 138, 141, 160], "ablat": [10, 14, 15], "abnorm": [16, 160], "abort": [155, 160], "about": [0, 1, 2, 3, 4, 6, 7, 11, 15, 16, 17, 20, 21, 29, 33, 42, 61, 67, 69, 84, 88, 92, 104, 105, 111, 127, 132, 133, 135, 136, 137, 139, 141, 147, 151, 155, 159, 160, 166, 169], "abov": [2, 7, 11, 12, 15, 16, 17, 18, 20, 21, 26, 27, 28, 29, 31, 32, 33, 34, 38, 39, 42, 43, 47, 79, 88, 99, 104, 106, 109, 118, 119, 125, 127, 132, 133, 135, 139, 147], "absenc": [17, 114], "absolut": 9, "absorb": 13, "abstract": [40, 103, 139, 142, 155], "abstractsetintstr": 155, "ac": 160, "acc": [141, 144], "acceler": [4, 5, 6, 7, 11, 12, 16, 21, 23, 28, 29, 31, 32, 79, 85, 98, 103, 113, 119, 121, 144, 158, 170], "accept": [0, 1, 2, 10, 11, 12, 16, 18, 19, 20, 21, 22, 24, 36, 37, 47, 52, 57, 58, 59, 60, 79, 88, 103, 104, 105, 106, 109, 121, 133, 135, 141, 146, 152, 154, 155, 159, 160, 163, 166], "accept_length": 146, "acceptance_length_threshold": 155, "acceptance_window": 155, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [10, 16, 18, 20, 21, 28, 29, 33, 34, 38, 39, 56, 66, 78, 90, 92, 111, 129, 132, 133, 141, 155, 159, 160], "accessor": 1, "accommod": [17, 88, 112, 162, 177], "accomplish": 134, "accord": [10, 12, 18, 19, 21, 79, 113, 126, 141, 142, 163], "accordingli": [10, 18, 19, 126, 155], "account": [2, 8, 17, 30, 64, 65, 66, 103, 106, 125, 128], "accumul": [0, 8, 11, 16, 20, 23, 63, 113, 114, 141, 144, 146, 155], "accur": [3, 14, 42, 56, 88, 110, 132, 133, 158, 160], "accuraci": [2, 3, 11, 12, 13, 15, 21, 23, 24, 30, 33, 34, 135, 140, 141, 144, 149, 159, 160], "accuracy_threshold": 24, "achiev": [2, 3, 7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 29, 33, 34, 43, 54, 94, 103, 104, 105, 106, 121, 133, 135, 138, 140, 155, 156, 161], "acknowledg": 105, "acquir": [10, 20], "across": [0, 5, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 61, 64, 68, 70, 79, 88, 91, 92, 94, 99, 104, 105, 110, 112, 113, 114, 115, 125, 126, 134, 135, 138, 139, 141, 146, 155, 158, 166], "act": [13, 16, 17, 88, 92, 100], "act_fn": 142, "act_typ": [125, 141], "action": [62, 70], "activ": [0, 3, 4, 7, 13, 15, 16, 17, 21, 23, 29, 36, 40, 79, 85, 88, 94, 99, 100, 101, 113, 115, 125, 134, 141, 144, 149, 150, 160, 170, 178], "activation_scaling_factor": 124, "activationtyp": [125, 141], "active_request": [103, 178], "actor": 101, "actual": [2, 7, 8, 13, 14, 16, 18, 20, 22, 23, 26, 29, 33, 34, 36, 61, 87, 92, 98, 103, 104, 115, 116, 121, 135, 138, 139, 140, 155, 160, 172, 177], "ad": [1, 8, 11, 12, 14, 15, 17, 19, 20, 24, 29, 41, 53, 79, 82, 88, 93, 95, 98, 103, 104, 105, 113, 114, 115, 117, 121, 122, 127, 129, 131, 134, 138, 139, 141, 143, 146, 153, 155, 160, 162, 166, 173], "ada": [7, 79, 100, 106, 113, 135, 150, 158, 160], "adalayernorm": 142, "adalayernormcontinu": 142, "adalayernormzero": 142, "adalayernormzerosingl": 142, "adapt": [0, 14, 16, 20, 22, 39, 55, 85, 99, 101, 105, 118, 141, 142, 155, 156, 158, 160, 161, 170], "adapter1": [95, 173], "adapter2": [95, 173], "adapter_id": 155, "adapter_s": 118, "adapters": 1, "add": [1, 9, 11, 14, 19, 21, 24, 28, 29, 30, 31, 32, 33, 34, 37, 38, 42, 79, 87, 90, 104, 106, 111, 113, 115, 118, 123, 124, 125, 127, 128, 129, 132, 133, 135, 139, 141, 146, 151, 154, 155, 156, 159, 160, 161, 172, 177], "add_": 10, "add_activ": 125, "add_argu": [62, 67, 68, 70], "add_bias_linear": 143, "add_generation_prompt": [13, 90], "add_input": 141, "add_not": 155, "add_output": 141, "add_padding_request": 177, "add_prefix_spac": 63, "add_qkv_bia": 143, "add_rmsnorm": 13, "add_sequ": 146, "add_special_token": [11, 13, 28, 31, 63, 70, 146, 155, 160], "addbadhandl": 1, "addcumlogprob": 160, "added_kv_proj_dim": 142, "added_proj_bia": 142, "addit": [0, 5, 8, 10, 11, 14, 15, 16, 17, 19, 20, 27, 28, 29, 31, 32, 33, 34, 35, 36, 39, 42, 43, 47, 56, 61, 65, 79, 81, 82, 86, 87, 88, 96, 98, 103, 104, 105, 106, 108, 109, 113, 114, 118, 121, 125, 127, 132, 134, 135, 138, 141, 142, 149, 150, 151, 155, 160, 163, 165, 166, 171, 172, 177], "addition": [12, 20, 79, 103, 132, 133, 135, 139, 156, 161, 163], "additional_context_output": [78, 155], "additional_generation_output": [78, 155], "additional_model_output": 155, "additional_opt": 66, "additionalmodeloutput": [0, 111], "additionaloutput": [0, 111], "addr": 0, "address": [1, 2, 7, 8, 10, 13, 15, 16, 66, 99, 104, 126, 139, 147, 154, 155, 160], "addresswiths": 1, "adequ": [142, 155], "adher": 56, "adirondack": [29, 33], "adjust": [9, 16, 17, 18, 21, 28, 29, 31, 32, 33, 34, 42, 63, 64, 65, 66, 88, 110, 132, 133, 147, 155, 178], "admin": 109, "adopt": [10, 16, 19, 114, 127, 160], "adp": [10, 105, 160], "advanc": [6, 10, 12, 13, 14, 15, 16, 17, 23, 52, 57, 58, 60, 78, 79, 82, 88, 92, 99, 105, 109, 121, 125, 141, 144, 152, 154, 159, 160, 163, 166], "advantag": [16, 17, 19, 20, 21, 88, 93, 114], "advers": [3, 23, 144], "advertis": [42, 132], "afd": 91, "affect": [2, 8, 10, 12, 23, 82, 98, 119, 127, 133, 135, 138, 139, 144, 147, 160, 166], "affin": 142, "aforement": [10, 16, 38], "after": [0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 53, 61, 66, 78, 79, 85, 86, 88, 92, 93, 98, 99, 103, 104, 106, 110, 111, 113, 115, 116, 117, 118, 121, 125, 126, 128, 132, 135, 138, 139, 140, 141, 142, 144, 147, 155, 159, 160, 163, 170, 171, 178], "afterward": 12, "again": [10, 16, 62, 125, 133, 135, 139, 151], "against": [19, 42, 106, 132, 155, 160], "agent": [0, 5, 10, 11], "agentconnect": 39, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": [12, 15, 16, 17, 49, 88], "aggress": [12, 14, 53, 124, 135, 140], "agnost": [21, 160], "agre": [31, 32, 154], "agreement": 154, "ahead": [0, 18, 113, 121], "ai": [2, 4, 11, 12, 13, 16, 18, 20, 21, 27, 28, 31, 35, 41, 48, 52, 53, 54, 57, 58, 59, 60, 63, 68, 71, 86, 109, 131, 133, 140, 141, 150, 152, 154, 157, 158, 159, 160, 171], "aidc": 160, "aim": [2, 13, 16, 21, 39, 40, 42, 112, 124, 132, 133, 135, 160], "aime2025": 29, "aime24": 11, "aime25": 29, "ainsli": 3, "air": [29, 160], "airport": 29, "aka": 141, "akhoroshev": 160, "al": [3, 19], "albani": [29, 33], "albeit": 121, "alessionetti": 160, "algo": [70, 71], "algorithm": [0, 7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 42, 70, 71, 79, 103, 105, 113, 114, 121, 124, 125, 127, 132, 135, 141, 155, 158, 160], "alia": [36, 142, 143, 155], "alias": 155, "alibi": 141, "alibi_bias_max": [141, 142], "alibi_scal": 141, "alibi_slop": 141, "alibi_with_scal": 141, "align": [1, 8, 12, 19, 20, 96, 104, 109, 160, 178], "align_corn": 141, "aligneddata": 12, "all": [0, 1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 28, 29, 31, 32, 33, 34, 36, 37, 42, 43, 54, 61, 64, 65, 66, 67, 68, 69, 70, 79, 82, 86, 87, 88, 91, 92, 93, 95, 98, 99, 103, 104, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 118, 121, 125, 126, 127, 132, 133, 134, 135, 138, 139, 140, 141, 142, 144, 146, 147, 149, 150, 151, 155, 159, 160, 163, 166, 171, 172, 173, 177, 178], "all2al": [16, 20, 160], "all_clos": 160, "all_reduc": 160, "all_reduce_param": [141, 142], "allbitset": [0, 1], "allgath": [12, 15, 20, 23, 104, 125, 139, 141, 144, 160], "allgather_list": 104, "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 18, 20, 24, 27, 28, 40, 47, 53, 66, 68, 79, 81, 88, 92, 94, 98, 103, 104, 106, 110, 113, 116, 117, 140, 141, 146, 147, 151, 155, 160, 162, 163, 165, 177, 178], "allocateipcmemori": 1, "allocnewblock": [0, 27], "allocnewblocksperrequest": 0, "alloctotalblock": [0, 27], "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 160], "allow": [0, 1, 3, 6, 10, 11, 12, 15, 16, 17, 20, 21, 23, 28, 29, 31, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 61, 66, 78, 87, 88, 92, 93, 94, 95, 97, 98, 99, 100, 102, 104, 110, 111, 113, 114, 117, 121, 124, 131, 132, 133, 134, 135, 138, 139, 141, 144, 151, 154, 155, 158, 159, 160, 162, 166, 172, 173, 174, 178], "allow_pickl": 155, "allreduc": [13, 15, 23, 104, 125, 139, 141, 144, 155, 160], "allreduce_gemm": 120, "allreduce_strategi": [119, 155, 160], "allreducebuff": 1, "allreducefusionkernel": 13, "allreducefusionop": 141, "allreduceparam": [141, 142], "allreducestrategi": [119, 141], "alltoal": [20, 160], "alltoall_prepare_maybe_dispatch": 160, "alltoallprepar": 20, "almost": [10, 15, 16, 125, 135, 138, 147], "alon": [8, 112], "along": [10, 14, 20, 78, 79, 86, 106, 113, 121, 141, 160, 166, 171], "alongsid": [16, 85, 86, 170, 171], "alpaca": 118, "alpha": [141, 142, 155, 160], "alphabet": [141, 155], "alreadi": [0, 2, 11, 12, 13, 14, 15, 16, 17, 19, 21, 36, 40, 61, 79, 87, 98, 104, 113, 115, 117, 135, 138, 140, 141, 155, 156, 160, 161, 177], "also": [0, 2, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 42, 43, 47, 53, 54, 63, 79, 87, 88, 93, 94, 98, 103, 106, 107, 109, 110, 111, 113, 115, 117, 121, 124, 125, 126, 127, 132, 133, 134, 135, 136, 137, 138, 141, 142, 144, 147, 149, 154, 155, 156, 158, 159, 160, 161, 162, 163, 177], "altair": 160, "alter": [93, 111, 115], "altern": [10, 13, 20, 21, 24, 37, 42, 62, 85, 87, 90, 106, 111, 120, 132, 156, 161, 162, 170, 172], "although": [10, 17, 20, 42, 82, 88, 110, 115, 125, 132, 135, 139, 166], "alwai": [0, 1, 10, 15, 16, 19, 20, 36, 61, 79, 82, 90, 98, 103, 111, 113, 114, 117, 124, 125, 127, 138, 139, 141, 151, 155, 166], "always_share_across_beam": 146, "am": [52, 57, 58, 60, 109, 133, 140, 146, 152, 154, 159], "ambigu": [1, 30, 36], "amc23": 11, "amd": 160, "amen": [0, 10, 90, 111, 155], "america": 90, "american": 61, "among": [10, 17, 19, 39, 93, 129, 141], "amongst": 141, "amort": 19, "amount": [0, 15, 16, 20, 23, 42, 94, 98, 117, 125, 132, 138, 140, 144, 146, 147, 151, 155], "amp": 21, "amper": [4, 21, 100, 106, 150, 158, 160], "amplifi": 10, "amsterdam": 29, "amtrak": 29, "an": [0, 1, 3, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 42, 43, 47, 52, 56, 57, 58, 59, 60, 61, 62, 63, 78, 79, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 109, 110, 111, 113, 114, 115, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 144, 146, 147, 149, 151, 152, 154, 155, 156, 159, 160, 161, 162, 163, 166, 168, 169, 170, 171, 172, 173, 177, 178], "analog": 38, "analys": 16, "analysi": [11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 78, 105, 115, 147], "analysispatternmanag": 115, "analysisth": 29, "analyt": 4, "analyz": [12, 20, 63, 78, 115, 133], "andlength": 22, "ani": [0, 1, 2, 8, 10, 12, 14, 15, 16, 20, 27, 29, 35, 36, 37, 38, 40, 42, 43, 61, 64, 78, 79, 82, 87, 92, 93, 98, 104, 106, 111, 115, 116, 121, 126, 127, 132, 138, 139, 140, 141, 143, 144, 146, 151, 155, 156, 161, 162, 163, 166, 172], "annot": [36, 155], "announc": [2, 3, 4, 6], "anoth": [0, 1, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 28, 29, 31, 32, 33, 34, 35, 79, 88, 94, 98, 113, 115, 118, 127, 138, 141, 151, 154, 163, 178], "answer": [11, 14, 19, 21, 29, 30, 33, 56, 63, 68, 90], "answer_suffix": 11, "answer_suffix_with_mark": 11, "antialia": 141, "antonin": [52, 109, 152, 154, 159], "anybitset": [0, 1], "anymor": [16, 103], "anyof": 144, "anyth": [43, 61, 67, 155], "aot_module_simplifi": 104, "aotman": 160, "apart": [39, 47], "api": [2, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 24, 26, 35, 40, 41, 42, 43, 47, 48, 52, 55, 61, 65, 66, 68, 78, 84, 85, 86, 87, 88, 94, 96, 101, 106, 110, 114, 117, 121, 123, 124, 125, 131, 132, 135, 136, 137, 139, 140, 141, 147, 151, 153, 158, 161, 169, 170, 171, 172, 173], "api_kei": [27, 72, 73, 74, 75, 76, 90, 95, 173], "api_st": 36, "app": [28, 29, 31, 32, 33, 34, 35, 106, 160], "appar": [8, 88, 110], "appeal": 19, "appear": [0, 20, 79, 91, 98, 103, 109, 113, 114, 141, 151, 155, 160, 166], "append": [11, 14, 19, 41, 61, 70, 87, 131, 141, 172, 178], "append_paged_kv_cach": [79, 163], "appl": [86, 160, 171], "appli": [0, 12, 13, 14, 15, 16, 20, 21, 23, 24, 38, 40, 42, 79, 85, 86, 87, 91, 93, 94, 95, 106, 111, 113, 115, 118, 121, 124, 125, 126, 132, 141, 142, 146, 149, 155, 160, 163, 166, 170, 171, 172, 173], "applic": [0, 4, 7, 8, 9, 13, 15, 16, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 41, 44, 45, 46, 61, 78, 84, 88, 103, 109, 117, 121, 131, 146, 151, 154, 157, 158, 159, 160, 168, 169, 178], "apply_batched_logits_processor": 155, "apply_callback": [87, 172], "apply_chat_templ": [13, 24, 56, 90], "apply_llama3_sc": 141, "apply_query_key_layer_sc": [142, 143], "apply_residual_connection_post_layernorm": 143, "apply_rotary_pos_emb": 141, "apply_rotary_pos_emb_chatglm": 141, "apply_rotary_pos_emb_cogvlm": 141, "apply_router_weight_on_input": 160, "apply_silu": 141, "applybiasropeupdatekvcach": 160, "applyrop": 13, "appreci": [10, 11, 15], "approach": [0, 8, 11, 12, 13, 14, 15, 16, 17, 40, 42, 87, 88, 98, 103, 104, 108, 112, 115, 117, 119, 121, 132, 140, 155, 166, 172], "appropri": [2, 7, 11, 17, 36, 37, 40, 47, 82, 84, 88, 98, 151, 166, 169], "approx": 29, "approxim": [15, 16, 19, 33, 106, 142], "apt": [2, 85, 106, 109, 128, 170], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 52, 54, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 72, 73, 76, 78, 79, 81, 82, 85, 87, 88, 90, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 146, 147, 149, 151, 152, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 165, 166, 168, 170, 172, 173, 175, 176, 177, 178], "arang": 141, "arbitrag": [42, 132], "arbitrari": [10, 36, 92, 103, 104, 126, 160], "arbitrarili": 12, "arch": 103, "architectur": [4, 8, 14, 15, 16, 17, 20, 21, 28, 29, 30, 31, 32, 33, 34, 39, 80, 81, 85, 86, 87, 88, 91, 105, 106, 110, 112, 114, 117, 124, 143, 146, 150, 153, 157, 160, 164, 165, 166, 170, 171, 172], "archiv": [87, 172], "arctic": [150, 160], "area": [15, 16, 29, 33], "aresult": [47, 155], "arg": [0, 11, 22, 24, 27, 36, 62, 67, 68, 70, 81, 82, 87, 104, 115, 127, 142, 143, 144, 146, 155, 160, 165, 166, 172], "arg17_1": 104, "arg18_1": 104, "arg19_1": 104, "arg20_1": 104, "arg21_1": 104, "arg22_1": 104, "arglist": 115, "argmax": [12, 103, 141], "argpars": [62, 67, 68, 70, 144], "argument": [2, 10, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 42, 47, 53, 54, 61, 65, 71, 79, 88, 90, 92, 103, 104, 105, 106, 110, 111, 132, 134, 141, 144, 147, 155, 160, 163], "argumentpars": [62, 67, 68, 70], "aris": [8, 10, 106], "arithmet": [10, 21, 125], "arm": 37, "arm64": 21, "aros": 20, "around": [1, 20, 87, 94, 124, 127, 133, 139, 172], "arrai": [0, 1, 37, 141, 146, 155], "arrang": 0, "arrayview": [0, 1], "arriv": [0, 8, 11, 62, 92, 112], "arrivaltim": 0, "arrow": 141, "art": [2, 13, 16, 17, 34], "articl": [13, 14, 39, 113, 121], "artifact": [38, 105, 106], "artifici": [18, 69], "arxiv": [0, 1, 23, 112, 118, 141, 144, 149], "as_dtyp": 141, "as_lay": 115, "as_shap": 141, "ascii": [141, 155], "asciichar": 1, "ask": [11, 19, 21, 29, 30, 33, 67, 151], "aspect": [98, 113], "asscoiat": 87, "assembl": [11, 125], "assert": [61, 75, 76, 92, 115, 141, 151, 160, 178], "assert_valid_quant_algo": 143, "asset": 9, "assign": [0, 10, 16, 20, 42, 93, 127, 132, 142, 144, 156, 161], "assist": [18, 21, 27, 29, 30, 33, 34, 44, 45, 56, 72, 73, 76, 90, 114, 154, 159, 168], "assistant_model": 114, "assistantfin": 29, "associ": [1, 8, 15, 17, 38, 40, 88, 103, 106, 111, 112, 118, 133, 141, 172], "asssembl": 121, "assum": [1, 2, 9, 14, 15, 16, 33, 42, 54, 104, 111, 117, 118, 121, 122, 132, 141, 146, 155], "assumpt": [17, 23, 121], "assur": 16, "async": [1, 11, 47, 58, 59, 132, 146, 155], "asynchron": [1, 10, 11, 16, 40, 47, 55, 61, 92, 96, 101, 111, 155], "asynchroni": 16, "asyncio": [11, 58, 59], "asyncllmengin": 160, "aten": [85, 86, 170, 171], "atlant": [29, 33], "atom": 1, "attach": [2, 78, 103, 155, 159], "attack": 93, "attempt": [0, 1, 38, 43, 88, 92, 110, 133, 135, 155], "attend": [94, 140], "attent": [0, 1, 2, 3, 8, 10, 12, 16, 17, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 55, 68, 78, 80, 81, 84, 85, 88, 89, 91, 103, 104, 105, 110, 114, 116, 117, 118, 121, 123, 125, 126, 141, 144, 146, 147, 151, 153, 155, 156, 157, 158, 160, 161, 164, 165, 166, 168, 169, 170, 177], "attention_backend": [70, 79, 156, 161, 163], "attention_bia": 160, "attention_chunk_s": 94, "attention_dp": 160, "attention_dp_config": [8, 29, 155], "attention_dp_events_gather_period_m": 155, "attention_head_s": [141, 142], "attention_mask": [79, 141, 142, 143, 146, 163], "attention_mask_param": 143, "attention_mask_typ": 142, "attention_multipli": 143, "attention_output": 151, "attention_output_orig_quant_scal": 141, "attention_output_sf_scal": 141, "attention_packed_mask": [141, 142], "attention_param": [142, 143], "attention_qk_half_accumul": 160, "attention_window_s": [79, 116], "attentionconfig": 0, "attentiondpconfig": [105, 155], "attentiondpeventsgatherperiodm": 0, "attentiondprank": 0, "attentionheads": 1, "attentionlayernumperpp": 0, "attentionmask": [79, 163], "attentionmaskparam": 142, "attentionmasktyp": [141, 142], "attentionmetadata": [103, 156, 161], "attentionparam": [142, 143], "attentiontyp": 0, "attn": [81, 82, 165, 166], "attn_backend": [70, 79, 84, 86, 155, 163, 168, 171], "attn_bia": 143, "attn_dens": [23, 118], "attn_forward_funcnam": 142, "attn_k": [23, 42, 95, 118, 132, 173], "attn_logit_softcap": 143, "attn_logit_softcapping_scal": 141, "attn_metadata": [156, 161], "attn_page_s": [84, 169], "attn_processor": 143, "attn_q": [23, 42, 95, 118, 132, 173], "attn_qkv": [23, 118], "attn_v": [23, 42, 95, 118, 132, 173], "attribut": [0, 1, 103, 111, 115, 126, 127, 144, 146, 155], "audienc": 63, "audio": [27, 146, 150, 157, 160], "audio_engine_dir": 146, "audio_featur": 146, "audio_path": 146, "audio_url": 27, "aug": 12, "augment": 168, "august": 12, "authent": [9, 133, 154], "author": [10, 103], "authorized_kei": [128, 129], "auto": [0, 1, 16, 21, 22, 23, 26, 28, 31, 32, 38, 42, 70, 81, 88, 93, 105, 110, 111, 113, 114, 119, 122, 125, 132, 139, 141, 143, 144, 145, 155, 160, 165], "auto_deploi": [81, 82, 85, 160, 165, 166, 170], "auto_deploy_log_level": [83, 167], "auto_function": 104, "auto_functionalize_v2": 104, "auto_parallel": 160, "auto_quantize_bit": 145, "autoawq": 160, "autodecodingconfig": [105, 155], "autodeploi": [80, 81, 82, 84, 86, 105, 153, 160, 164, 165, 166, 168, 169, 171], "autodeploy_config": [82, 166, 168], "autodeploy_overrid": [82, 166], "autodeployconfi": [82, 166], "autodeployconfig": [82, 84, 166, 169], "autogptq": 160, "autom": [17, 37, 56, 63, 85, 86, 160, 170, 171], "automat": [0, 1, 10, 12, 13, 16, 21, 24, 27, 30, 36, 37, 40, 42, 47, 64, 80, 82, 85, 103, 111, 115, 119, 125, 126, 132, 133, 141, 147, 149, 154, 155, 158, 160, 164, 166, 168, 170], "automodelforcausallm": [81, 84, 86, 165, 169, 171], "automodelforimagetexttotext": [86, 171], "automot": 61, "autonom": 18, "autopp": 160, "autoq": 160, "autoregress": [0, 40, 68, 79, 121, 163, 177], "autotoken": [11, 47], "autotun": [155, 160], "autotuner_en": 160, "aux": 147, "auxiliari": [10, 78, 87, 121, 172], "avaiable_block": 178, "avail": [0, 1, 3, 5, 8, 9, 11, 16, 17, 20, 26, 27, 28, 29, 31, 32, 35, 38, 40, 42, 47, 52, 57, 58, 60, 69, 78, 79, 80, 81, 84, 88, 90, 93, 96, 98, 100, 101, 102, 103, 106, 107, 109, 111, 115, 117, 120, 121, 125, 132, 138, 139, 140, 146, 147, 149, 152, 153, 154, 155, 159, 160, 163, 164, 165, 166, 176, 177], "averag": [0, 2, 8, 10, 14, 16, 17, 19, 20, 22, 26, 28, 29, 31, 32, 42, 43, 121, 132, 133, 135, 138, 139, 155], "avg": [8, 26, 28, 29, 31, 32, 42, 132, 133, 141], "avg_decoded_tokens_per_it": [29, 30, 33], "avg_pool2d": 141, "avgdecodedtokensperit": 0, "avgnumdecodedtokensperit": 0, "avgpool2d": 142, "avoid": [1, 8, 10, 11, 12, 13, 14, 15, 16, 21, 28, 31, 36, 92, 93, 103, 104, 106, 127, 146, 147, 154, 160], "awai": [98, 138, 139], "await": [0, 11, 39, 47, 58, 59, 111], "await_respons": 155, "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 111], "awar": [3, 12, 17, 19, 21, 79, 103, 104, 113, 151, 160], "awq": [7, 47, 100, 150, 160], "awq_block_s": 145, "ax": 141, "axi": [6, 16, 141], "b": [1, 3, 4, 5, 6, 37, 41, 62, 82, 115, 118, 125, 131, 141, 143, 146, 155, 160, 166], "b200": [14, 15, 16, 18, 19, 21, 28, 29, 35, 43, 105, 158, 160], "b300": 29, "b6261862419c33d6ce2313aff1e7116067d6037d": 2, "b_sf": 141, "back": [0, 1, 10, 12, 16, 40, 43, 53, 61, 62, 92, 93, 117, 119, 121, 160], "backend": [0, 2, 9, 10, 11, 14, 15, 16, 22, 24, 26, 27, 30, 31, 32, 33, 34, 36, 37, 41, 42, 43, 48, 55, 56, 61, 70, 80, 81, 82, 84, 85, 87, 90, 94, 95, 96, 100, 102, 103, 105, 111, 121, 125, 132, 154, 155, 156, 160, 162, 164, 165, 166, 168, 169, 170, 172, 173, 175, 176, 177, 178], "backend_token": [0, 111], "backendagentdesc": 0, "backendtyp": [0, 88], "background": [1, 16, 26, 40, 92, 105], "backgroundconfigur": 1, "backlog": 37, "backstream": 1, "backtyp": 1, "backu": [0, 111, 155], "backup": 1, "backward": [85, 87, 127, 170, 172], "bad": [0, 36, 102, 111, 155, 158, 160, 176], "bad_token_id": 155, "bad_words_data": 146, "bad_words_list": 146, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [149, 150, 160], "baichuan2": 150, "baichuanconfig": 143, "baichuanforcausallm": 143, "balanc": [11, 15, 17, 28, 36, 68, 69, 93, 98, 104, 105, 112, 114, 121, 125, 138, 140, 155, 160], "band": 56, "bandwidth": [3, 4, 5, 7, 12, 15, 16, 20, 21, 43, 56, 88, 110, 114, 125], "bangbang": 4, "bank": 12, "bantoken": 0, "banword": 0, "bar": [12, 155], "bare": [153, 160], "barissglc": 67, "barrier": [19, 158], "bart": [150, 160], "base": [0, 1, 2, 3, 4, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 23, 26, 31, 32, 34, 38, 39, 40, 42, 58, 59, 61, 63, 64, 65, 66, 82, 86, 87, 88, 92, 95, 98, 99, 103, 104, 105, 106, 111, 117, 118, 119, 121, 123, 126, 127, 132, 138, 140, 141, 142, 143, 144, 145, 146, 147, 150, 153, 155, 156, 157, 160, 161, 162, 166, 171, 172, 173, 177, 178], "base64": [27, 73], "base_checkpoint_load": [87, 172], "base_config_load": [87, 172], "base_model": 118, "base_s": 142, "base_url": [27, 28, 31, 32, 72, 73, 74, 75, 76, 90, 95, 173], "base_weight_load": [87, 172], "base_weight_mapp": [87, 172], "baseagentconfig": 0, "basecheckpointload": 155, "basekvcachemanag": 0, "baselin": [7, 11, 12, 13, 14, 15, 20, 79, 82, 133, 138, 139, 163, 166], "baseline_fp8_engin": 135, "basellmarg": 155, "baseloopbackag": 0, "basemodel": [144, 155], "baseresourcemanag": [162, 177], "basesparseattentionconfig": 155, "basetransferag": [0, 39], "bash": [9, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 44, 45, 46, 48, 50, 51, 61, 62, 64, 65, 66, 68, 70, 125, 129, 131, 159], "basi": 17, "basic": [41, 105, 123, 131, 141, 160], "basic_string_view": 0, "batch": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 40, 42, 43, 62, 68, 70, 80, 81, 82, 85, 88, 91, 94, 96, 97, 99, 103, 104, 105, 114, 117, 118, 119, 121, 122, 125, 130, 132, 133, 135, 136, 137, 139, 140, 141, 142, 144, 146, 147, 151, 155, 156, 158, 160, 161, 162, 163, 164, 165, 166, 170, 174, 177, 178], "batch_beam_s": [113, 141], "batch_dim": 141, "batch_idx": 146, "batch_input_id": 146, "batch_manag": [0, 1, 61, 160, 177], "batch_output": [40, 62, 97], "batch_schedul": 160, "batch_siz": [2, 3, 6, 10, 12, 19, 26, 28, 30, 31, 32, 34, 43, 68, 70, 79, 82, 103, 113, 115, 122, 124, 141, 142, 145, 146, 147, 155, 163, 166], "batch_wait_max_tokens_ratio": 155, "batch_wait_timeout_it": 155, "batch_wait_timeout_m": 155, "batchdon": 1, "batched_logits_processor": 155, "batchedlogitsprocessor": 155, "batchidx": 1, "batchindex": 1, "batching_typ": 155, "batching_wait_it": [8, 29, 155], "batchingtyp": [0, 105, 155], "batchsiz": [0, 1, 4, 114], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "bc": 141, "bc1393d529ce485c961d9ffee5b25d72": [31, 32], "beam": [0, 1, 6, 22, 23, 24, 27, 40, 47, 69, 88, 94, 105, 114, 121, 141, 146, 147, 151, 155, 160], "beam_search_diversity_r": [146, 155], "beam_width": [22, 47, 69, 79, 98, 113, 114, 141, 146, 160], "beam_width_arrai": 155, "beamhypothes": 1, "beamsearch": [0, 155], "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 114], "beamsiz": 0, "beamtoken": [0, 111], "beamwidth": [0, 1, 110, 111, 114, 155, 160], "beamwidtharrai": [0, 1, 114], "beat": [82, 166], "beauti": 69, "becam": [0, 12, 20], "becaus": [0, 7, 8, 10, 11, 13, 14, 15, 16, 20, 23, 42, 43, 47, 67, 79, 88, 98, 99, 103, 104, 107, 110, 111, 117, 132, 133, 134, 135, 138, 140, 141, 144, 147, 154, 155], "becom": [7, 8, 10, 11, 12, 13, 15, 16, 19, 20, 36, 79, 82, 94, 96, 98, 99, 112, 113, 114, 115, 117, 118, 125, 126, 166], "been": [0, 4, 5, 10, 11, 12, 13, 15, 16, 20, 42, 67, 79, 86, 92, 93, 98, 101, 106, 107, 109, 111, 113, 127, 129, 132, 135, 138, 141, 151, 155, 159, 160, 171], "befor": [0, 1, 8, 10, 11, 12, 13, 14, 16, 18, 20, 21, 22, 28, 31, 36, 41, 61, 63, 64, 65, 66, 78, 79, 80, 88, 92, 93, 98, 99, 102, 103, 105, 106, 108, 109, 110, 111, 113, 115, 117, 118, 119, 124, 125, 126, 131, 134, 135, 138, 140, 141, 143, 146, 147, 151, 154, 155, 156, 159, 160, 161, 162, 163, 164, 166, 177, 178], "beforehand": 133, "began": 20, "begin": [28, 29, 30, 31, 32, 33, 34, 61, 85, 86, 90, 121, 134, 155, 156, 160, 161, 170, 171], "begin_thinking_phase_token": 155, "behav": [0, 144, 147, 155], "behavior": [8, 11, 20, 28, 29, 31, 32, 33, 34, 38, 42, 43, 62, 79, 80, 82, 88, 92, 98, 102, 105, 110, 113, 138, 141, 146, 147, 155, 160, 164, 166], "behaviour": [0, 16, 141], "behind": [4, 11, 12, 15, 40], "being": [0, 10, 15, 18, 19, 20, 21, 28, 29, 31, 32, 33, 34, 39, 61, 67, 79, 81, 87, 93, 98, 101, 109, 113, 117, 125, 127, 138, 151, 155, 160, 163, 165, 172], "believ": [16, 42, 132], "belong": [20, 98, 138], "below": [0, 2, 5, 6, 7, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 26, 28, 29, 31, 32, 33, 34, 35, 37, 42, 43, 79, 88, 90, 91, 98, 103, 104, 106, 108, 113, 114, 115, 116, 118, 128, 129, 132, 135, 138, 139, 151, 159, 160], "bench": [2, 10, 14, 16, 19, 21, 26, 28, 29, 30, 31, 32, 33, 34, 42, 43, 55, 67, 99, 104, 105, 132, 136, 137, 159, 160, 168], "benchmark": [9, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 25, 27, 41, 65, 81, 82, 85, 88, 91, 96, 99, 105, 106, 110, 131, 135, 136, 137, 139, 159, 160, 165, 166, 168, 170], "benchmark_2nod": 27, "benchmark_ratio": 30, "benchmark_serv": [26, 28, 29, 30, 31, 32, 33, 34, 160], "benefici": [10, 12, 15, 17, 20, 42, 88, 98, 132, 138, 139], "benefit": [5, 7, 8, 10, 12, 14, 15, 16, 17, 19, 20, 23, 35, 43, 61, 63, 68, 91, 94, 98, 104, 115, 117, 119, 138, 144, 160], "berkelei": 24, "berlin": 90, "bert": [23, 100, 141, 144, 149, 150, 157, 160], "bert_attent": 141, "bert_attention_plugin": [23, 144], "bert_context_fmha_fp32_acc": [23, 144], "bertattent": 142, "bertattentionplugin": 141, "bertbas": 143, "bertforquestionansw": 143, "bertforsequenceclassif": [143, 150, 157], "bertmodel": 143, "besid": [20, 26, 27, 162], "best": [8, 10, 11, 13, 14, 15, 16, 17, 19, 21, 29, 30, 41, 42, 80, 105, 113, 125, 131, 132, 134, 136, 137, 138, 155, 160, 164, 168], "best_of": [102, 155, 160], "best_path": 146, "best_path_len": 146, "best_path_length": 146, "best_perf_practice_on_deepseek": [13, 160], "bestofn": 11, "bestpathindic": 1, "bestpathlength": 1, "beta": [27, 141, 153, 155, 158], "beta_fast": 141, "beta_slow": 141, "better": [0, 6, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 29, 33, 39, 40, 43, 63, 64, 65, 66, 68, 79, 88, 93, 94, 97, 98, 101, 103, 110, 113, 114, 117, 119, 126, 127, 134, 135, 139, 140, 144, 155, 160, 174], "between": [0, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 26, 27, 28, 29, 31, 32, 33, 34, 36, 39, 41, 43, 45, 61, 68, 73, 79, 84, 87, 88, 91, 92, 93, 95, 99, 104, 110, 113, 114, 116, 117, 120, 121, 125, 126, 127, 131, 134, 139, 140, 141, 142, 147, 151, 155, 156, 160, 161, 172, 173], "beyond": [1, 4, 11, 19, 61, 96, 98, 135], "bf16": [1, 2, 12, 13, 15, 18, 33, 79, 86, 100, 113, 119, 126, 127, 135, 139, 150, 157, 160, 171], "bfloat16": [23, 42, 70, 79, 104, 113, 125, 132, 133, 144, 149, 150, 155, 160], "bhuvanesh09": 160, "bi": [79, 113], "bia": [0, 15, 79, 111, 124, 125, 141, 142, 143, 155, 160], "bias": [124, 141], "bidirect": [39, 141, 142], "bidirectionalglm": 141, "bielik": 150, "big": [19, 63], "bigcod": [86, 171], "bigger": [33, 117], "biggest": [76, 117], "billion": 2, "bin": [2, 9, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 44, 45, 46, 48, 50, 51, 64, 65, 66, 87, 124, 125, 126, 131, 151, 160, 172], "binari": [39, 41, 87, 121, 125, 131, 141, 172], "bind": [1, 9, 10, 11, 16, 61, 66, 140, 146, 147, 155, 160, 162, 177, 178], "bindcapacityschedul": 178, "bindf": 38, "bit": [0, 1, 4, 16, 67, 79, 104, 113, 141, 144, 149], "bit_length": 104, "bitmask": 160, "bl": [17, 121], "black": [104, 115], "blackwel": [2, 8, 12, 14, 16, 19, 20, 35, 43, 91, 100, 103, 105, 134, 135, 150, 158, 160], "blank": 93, "blip": [149, 160], "blip2": [100, 149, 150, 160], "blob": 13, "block": [0, 1, 10, 11, 12, 15, 16, 17, 20, 23, 47, 53, 61, 62, 68, 79, 88, 91, 92, 93, 94, 98, 100, 103, 104, 110, 113, 114, 117, 125, 138, 141, 144, 146, 147, 154, 155, 158, 160, 168, 177], "block_controlnet_hidden_st": 143, "block_id": [61, 92], "block_num": 141, "block_po": 61, "block_siz": [61, 141, 142, 146], "block_sparse_block_s": 141, "block_sparse_homo_head_pattern": 141, "block_sparse_num_local_block": 141, "block_sparse_param": 142, "block_sparse_vertical_strid": 141, "blockhash": 0, "blockidx": 1, "blockkei": 0, "blockptr": 1, "blocksiz": 0, "blockspars": 141, "blocksparseattnparam": 142, "blockwis": [61, 160], "blog": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 43, 88, 97, 99, 160, 174], "bloodeagle40234": 160, "bloom": [114, 126, 149, 150, 160], "bloom_dict": 126, "bloomforcausallm": 143, "bloommodel": 143, "bm": 1, "bmm": [125, 160], "bo": [28, 31], "board": [85, 139, 170], "bodi": [104, 125], "bold": 12, "book": 67, "bool": [0, 1, 61, 63, 79, 87, 92, 104, 115, 122, 124, 141, 142, 143, 144, 146, 155, 163, 172], "boolean": [1, 36, 92, 111, 118, 141, 143, 144], "boost": [2, 13, 15, 16, 18, 20, 63, 135, 138, 139], "boost_factor": 63, "boost_val": 63, "border": [28, 29, 30, 31, 32, 33], "born": [123, 125, 151], "borough": [29, 33], "borrow": [42, 47, 132], "bos_token": 63, "bos_token_ad": 63, "bos_token_id": [63, 146], "boston": 29, "bot": [37, 90], "both": [0, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 28, 29, 31, 32, 33, 34, 37, 42, 43, 52, 61, 69, 79, 82, 88, 92, 94, 95, 98, 99, 103, 110, 112, 113, 115, 116, 118, 121, 125, 126, 132, 134, 138, 140, 141, 142, 144, 147, 149, 155, 160, 162, 163, 166, 173], "bother": 61, "bottleneck": [2, 7, 8, 16, 20, 40, 94, 98, 112, 119, 134, 138], "bottom": [8, 10, 20, 129], "bound": [0, 5, 8, 12, 13, 14, 15, 20, 21, 39, 40, 97, 104, 114, 123, 125, 132, 141, 146, 147, 155, 160], "boundari": [15, 16, 20, 104, 105, 114, 125, 141, 143, 145, 147, 155], "box": [2, 11, 28, 29, 31, 32, 33, 34, 37, 104, 115], "bpru": 160, "br": 21, "brace": 155, "bracket": 20, "brahma": [42, 132], "branch": [3, 6, 12, 16, 18, 22, 24, 27, 28, 29, 31, 32, 121, 155], "brave_search": 90, "breadth": 121, "break": [13, 16, 20, 33, 36, 42, 61, 121, 132, 139, 155, 158, 160, 178], "breakdown": [20, 41, 42, 43, 131, 132, 133], "breakthrough": 158, "breviti": [2, 20], "bridg": [10, 16], "brief": [21, 37, 79, 143, 146, 163], "briefli": [11, 27, 45, 73], "brife": 0, "bright": [63, 103], "brilliant": 12, "bring": [7, 12, 13, 14, 15, 16, 86, 156, 160, 161, 171], "broad": [21, 154], "broadcast": [13, 92, 111, 141], "broadcast_help": 141, "broader": [85, 104, 113, 154, 158, 160, 170], "broadli": [15, 79, 100], "broken": [10, 98, 138, 160], "bronx": [29, 33], "brooklyn": [29, 33], "brought": 16, "brows": 159, "bsz": 142, "bu": 106, "bubbl": [8, 10, 104], "budget": [11, 70, 98, 105, 122, 138, 155], "buffalo": 33, "buffer": [0, 1, 10, 16, 23, 24, 27, 28, 29, 31, 32, 33, 34, 88, 94, 103, 104, 110, 111, 116, 117, 141, 144, 155, 160, 177], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 146, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 147, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [15, 37, 160], "build": [11, 12, 20, 21, 24, 28, 29, 30, 31, 32, 33, 34, 38, 41, 42, 67, 70, 80, 81, 84, 88, 92, 98, 104, 108, 109, 110, 111, 113, 114, 115, 117, 118, 120, 121, 122, 123, 125, 131, 134, 135, 136, 138, 140, 143, 144, 147, 151, 155, 158, 160, 164, 165, 169], "build_and_run_ad": [81, 85, 165, 170], "build_config": [23, 47, 67, 127, 135, 138, 139, 143, 152, 155], "build_connector_meta": [61, 92], "build_dir": 106, "build_engin": 125, "build_flags_multiple_profil": 139, "build_serialized_network": 125, "build_sqsh": 106, "build_wheel": [2, 30, 41, 106, 120, 131], "buildcacheconfig": [105, 155], "buildconfig": [47, 105, 122, 127, 135, 138, 139, 152, 155, 160], "builder": [122, 125, 127, 155, 160], "builder_force_num_profil": 160, "builder_opt": 160, "built": [10, 11, 15, 17, 19, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 38, 40, 43, 81, 83, 85, 88, 104, 105, 106, 108, 109, 111, 114, 117, 125, 127, 132, 133, 139, 140, 141, 144, 147, 151, 154, 158, 160, 165, 167, 170], "bulid": 98, "bulk": 16, "bump": 1, "bumptaskinprogress": 1, "burden": 134, "busi": [0, 10, 17, 40, 63, 97], "button": 160, "buvnswrn": 160, "bw": [8, 160], "by_alia": 155, "by_nam": 155, "bypass": [17, 38, 88, 166], "byt5": [150, 160], "byte": [0, 1, 12, 39, 93, 119, 146, 155], "bytearrai": 155, "bytestostr": 1, "c": [0, 1, 2, 10, 14, 15, 17, 18, 20, 26, 27, 29, 30, 33, 34, 41, 47, 64, 65, 66, 88, 89, 98, 102, 104, 109, 113, 115, 121, 125, 128, 129, 131, 138, 141, 143, 146, 155, 157, 160, 162, 176, 177, 178], "c0": 8, "c1": 8, "c2c": [16, 20], "c440e2a3e7e14cd699295afc3739bf42": 21, "c4dep4_g1dep4": 17, "c5bf51b5cab94e10ba5da5266d12ee59": 29, "ca": 90, "cach": [0, 1, 7, 8, 12, 13, 14, 15, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 38, 40, 42, 43, 47, 54, 55, 68, 70, 80, 81, 84, 85, 86, 89, 91, 94, 96, 99, 104, 105, 110, 111, 114, 118, 125, 127, 130, 132, 133, 138, 141, 144, 146, 148, 149, 153, 155, 156, 157, 158, 160, 161, 162, 163, 164, 165, 168, 170, 171, 178], "cache_block_id": [61, 92], "cache_fold": 61, "cache_indir": 146, "cache_indir_t": 141, "cache_indirect": [79, 113, 141, 142, 146, 151], "cache_root": 155, "cache_salt": [93, 155], "cache_transceiver_config": [30, 88, 155], "cached_properti": 155, "cached_token": 30, "cachehitr": [0, 27], "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachesaltid": 0, "cachesaltidtyp": [0, 1], "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 88, 105, 155], "cachetyp": 177, "cachevalu": 1, "calcul": [0, 3, 4, 6, 8, 12, 14, 15, 16, 17, 19, 20, 26, 28, 31, 32, 42, 93, 104, 132, 140, 141, 146, 147, 155, 160], "calculate_speculative_resourc": 155, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [135, 143, 155], "calib_batch_s": [135, 143, 155], "calib_config": [135, 155], "calib_dataset": [143, 145, 155], "calib_max_seq_length": [135, 143, 145, 155], "calib_s": [132, 145], "calibconfig": [105, 135, 155], "calibr": [7, 15, 16, 21, 23, 126, 135, 144, 155, 160], "california": [61, 90], "call": [0, 1, 10, 11, 12, 14, 15, 16, 17, 20, 36, 40, 41, 47, 62, 63, 79, 87, 88, 90, 92, 94, 103, 104, 111, 112, 113, 114, 115, 120, 125, 126, 127, 131, 133, 135, 141, 143, 144, 145, 146, 147, 155, 156, 160, 161, 162, 163, 177], "call_funct": 104, "callabl": [126, 143, 155], "callback": [87, 92, 111, 155, 172], "callstack": 10, "campaign": 63, "can": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 42, 43, 47, 49, 52, 53, 54, 61, 63, 64, 65, 66, 67, 71, 78, 79, 81, 82, 84, 85, 87, 88, 90, 91, 92, 93, 94, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 125, 126, 127, 128, 129, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 143, 144, 146, 147, 149, 150, 151, 153, 155, 156, 157, 159, 160, 161, 162, 163, 165, 166, 168, 169, 170, 172, 175, 177, 178], "canaccessp": 1, "canada": [29, 33], "canadian": 29, "cancel": [0, 111, 132, 155, 160], "cancelrequest": [0, 111], "candid": [0, 13, 14, 19, 102, 103, 114, 119, 121, 125, 155], "canenqueu": 0, "canenqueuerequest": 0, "cannot": [1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 87, 98, 99, 103, 104, 114, 125, 126, 138, 139, 140, 141, 147, 151, 155, 160, 172, 178], "cap": [22, 133], "capabl": [3, 11, 13, 16, 17, 41, 56, 82, 92, 105, 106, 131, 135, 155, 160, 166], "capac": [0, 1, 3, 5, 7, 16, 19, 20, 68, 155, 160, 178], "capacitor_schedul": 178, "capacity_scheduler_polici": [140, 155], "capacityschedul": [162, 177, 178], "capacityschedulerpolici": [0, 105, 140, 155, 160], "capit": [27, 29, 33, 34, 52, 53, 54, 57, 58, 59, 60, 68, 71, 90, 95, 109, 133, 140, 152, 154, 155, 159, 173], "caption": 142, "captur": [14, 15, 16, 34, 40, 68, 79, 86, 103, 104, 105, 155, 163, 171], "capturableguideddecod": 10, "capture_num_token": [104, 155], "car": 29, "card": [67, 81, 165], "care": [11, 16, 17, 33, 82, 88, 166], "carefulli": [2, 12, 16, 19], "carlo": 11, "case": [0, 1, 2, 4, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 28, 29, 31, 32, 33, 34, 36, 38, 42, 43, 47, 79, 88, 98, 103, 104, 105, 110, 113, 114, 116, 117, 118, 121, 132, 133, 135, 136, 137, 139, 141, 149, 154, 155, 160], "casefold": 155, "caseless": 155, "cast": [10, 15, 141], "cast_to_dtyp": 141, "castsiz": 1, "casual": 33, "cat": [2, 9, 14, 16, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 48, 65, 82, 90, 99, 166], "catalog": [26, 28, 29, 31, 32, 106, 107], "catch": 20, "categor": [15, 100, 121, 141], "categori": [24, 36, 144], "categorical_sampl": 141, "cathedr": 34, "caus": [8, 10, 16, 20, 23, 28, 88, 104, 109, 110, 111, 126, 127, 139, 151, 154, 155, 160], "causal": [14, 79, 141, 142, 163], "cautiou": 127, "caveat": [93, 135], "cd": [2, 14, 18, 30, 33, 34, 81, 85, 100, 101, 106, 123, 124, 132, 151, 165, 170, 175], "cdot": [26, 28, 29, 31, 32], "ceil": [1, 143], "ceil_mod": [141, 142], "ceildiv": 1, "celsiu": 90, "center": [4, 5, 17, 21, 88, 155], "centr": 29, "central": [33, 36, 87, 144, 172], "certain": [8, 16, 17, 40, 93, 99, 109, 115, 124, 141], "certainti": 11, "certainty_threshold": 11, "cg": 143, "cga": 160, "chain": [10, 11, 14, 24, 63], "challeng": [8, 13, 16, 17, 20, 36, 88, 94, 99, 105], "chanc": [16, 23, 117, 140, 144], "chang": [2, 3, 5, 6, 10, 12, 14, 15, 16, 28, 29, 31, 32, 33, 34, 37, 42, 64, 65, 71, 79, 85, 93, 94, 99, 101, 104, 105, 106, 110, 113, 114, 116, 117, 118, 126, 127, 132, 139, 141, 143, 144, 146, 147, 151, 152, 153, 154, 155, 170, 177], "channel": [16, 21, 23, 141, 144, 149, 160], "chapter": 11, "char": [0, 1, 155], "charact": [37, 155], "characterist": [8, 17, 68, 88], "charg": [79, 114, 125, 163], "chart": [4, 19], "chat": [5, 17, 18, 19, 21, 24, 26, 29, 30, 33, 34, 40, 46, 50, 52, 53, 54, 56, 57, 58, 59, 60, 63, 67, 68, 69, 74, 76, 77, 81, 82, 85, 86, 88, 90, 96, 105, 109, 121, 152, 154, 159, 160, 165, 166, 168, 170, 171], "chat_complet": [29, 90], "chat_templ": 27, "chat_template_kwarg": [24, 76], "chatbot": 67, "chatbot_lora_dir": 67, "chatcmpl": [18, 21, 29, 30, 33, 34, 159], "chatglm": [141, 149, 150, 160], "chatglm2": [150, 160], "chatglm3": [143, 150, 160], "chatglm_vers": 143, "chatglmconfig": 143, "chatglmforcausallm": 143, "chatglmgenerationsess": 146, "chatglmmodel": 143, "chatgpt": 21, "cheap": 103, "cheaper": 92, "check": [0, 10, 11, 12, 16, 18, 21, 28, 29, 31, 32, 33, 34, 35, 36, 38, 40, 43, 52, 61, 62, 66, 88, 92, 97, 105, 106, 109, 110, 111, 134, 135, 138, 139, 141, 146, 147, 151, 155, 156, 159, 160, 161, 174], "check_accuraci": [24, 124], "check_config": 143, "check_eagle_choic": 155, "check_gpt_mem_usag": 147, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [2, 12, 13, 14, 15, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 42, 52, 57, 81, 85, 86, 95, 100, 103, 105, 109, 123, 126, 127, 132, 133, 135, 144, 145, 146, 149, 151, 152, 154, 155, 156, 158, 159, 160, 161, 165, 170, 171, 173], "checkpoint_dir": [23, 87, 118, 122, 123, 124, 125, 127, 132, 151, 172], "checkpoint_format": 155, "checkpoint_load": [87, 155, 172], "checkposteriorvalu": 0, "checkpromptignorelength": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 151, "chen": 11, "china": 76, "chip": 61, "chmod": [28, 29, 30, 31, 32, 33, 34, 128], "choic": [0, 7, 14, 16, 18, 19, 21, 23, 24, 28, 29, 30, 31, 32, 33, 34, 39, 68, 69, 70, 71, 76, 79, 90, 121, 132, 134, 141, 146, 155, 159, 163], "choos": [13, 15, 16, 17, 28, 29, 31, 32, 81, 84, 90, 104, 125, 127, 135, 141, 160, 165, 169], "chose": [16, 20], "chosen": [15, 102, 147, 176, 178], "chri": 61, "chrome": [41, 131], "chrono": 0, "chunk": [0, 11, 15, 22, 23, 27, 61, 70, 89, 96, 104, 105, 116, 130, 139, 141, 144, 146, 147, 148, 155, 157, 158, 160], "chunk_dim": 142, "chunk_length": 160, "chunk_scan": 141, "chunk_siz": [11, 141, 143], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": [1, 8, 38, 105, 160], "circular": [79, 94, 113], "citi": [29, 30, 33, 34, 76, 90, 95, 159, 173], "ckpt": [42, 132], "ckpt_dir": [125, 127, 143], "ckpt_llama_3": 125, "ckpt_sourc": 155, "cl": [123, 127, 155], "claim": [1, 126], "claimpag": 1, "claimpageswithevict": 1, "clamp": [0, 155, 160], "clamp_val": 155, "clara": [21, 61], "clarifi": [30, 33, 39], "clariti": 36, "class": [0, 1, 7, 10, 11, 23, 26, 27, 28, 29, 31, 32, 33, 34, 40, 47, 61, 63, 79, 81, 82, 84, 87, 93, 101, 102, 103, 104, 105, 106, 113, 114, 115, 116, 122, 123, 125, 126, 127, 134, 135, 139, 141, 142, 143, 145, 146, 151, 154, 155, 156, 160, 161, 162, 163, 165, 166, 169, 172, 178], "class_dropout_prob": 142, "class_label": 142, "classic": [16, 21, 125], "classifi": [36, 142, 143], "classmethod": [123, 127, 142, 143, 144, 146, 155], "classvar": 155, "clean": [2, 10, 16, 41, 61, 87, 106, 131, 151, 172], "cleaner": 12, "cleanup": [61, 87, 172], "clear": [8, 16, 21, 30, 33, 36, 104, 138, 146], "clear_logprob_param": 155, "clearer": 24, "clearli": [8, 10, 11, 12, 16, 36, 140], "clearvirtualmemoryalloc": 1, "cli": [2, 9, 21, 42, 47, 124, 132, 134, 135, 138, 139, 159, 160, 168], "click": [61, 69, 71, 86, 128, 129, 171], "client": [0, 17, 20, 21, 27, 28, 29, 30, 31, 32, 33, 34, 43, 66, 77, 88, 90, 93, 96, 105, 111, 121], "client_id": [63, 102, 155], "clientid": 0, "clip": 141, "clip_before_cast": 141, "clip_qkv": [142, 143], "clip_vision_model": 143, "clipvisiontransform": 143, "clock": 13, "clone": [2, 11, 100, 106, 118, 133, 151, 154, 175], "clone_input": 115, "close": [2, 10, 12, 16, 17, 20, 23, 79, 113, 127, 139, 147], "closer": 8, "closur": 141, "cloud": [4, 21, 49, 128, 129], "cls_token": 142, "cluster": [13, 16, 18, 20, 21, 22, 27, 30, 49, 99, 109, 114, 125, 155, 160], "cluster_info": 160, "cluster_kei": 160, "cluster_s": [22, 27], "clusteruuid": [88, 110], "cmake": [106, 160], "cmpl": [28, 31, 32], "cn": 8, "cnn": 24, "cnn_dailymail": [143, 155], "co": [0, 2, 8, 9, 10, 12, 14, 15, 16, 27, 45, 73, 118, 141, 142, 151, 154], "coast": [29, 33, 159], "code": [7, 10, 11, 13, 15, 16, 18, 20, 21, 22, 26, 27, 30, 36, 38, 39, 40, 41, 47, 53, 64, 65, 66, 79, 85, 86, 90, 93, 103, 104, 105, 108, 113, 115, 116, 119, 120, 121, 125, 127, 131, 132, 141, 149, 150, 151, 154, 155, 156, 158, 160, 161, 168, 170, 171, 177, 178], "codebas": [36, 116, 156, 161], "codec": 155, "codegemma": [86, 171], "codellama": [86, 160, 171], "codepath": 160, "codeqwen": 160, "coderham": 160, "codestr": [86, 171], "cognit": 94, "cogvlm": [150, 160], "cogvlmattent": 142, "cogvlmconfig": 143, "cogvlmforcausallm": 143, "coher": [36, 114, 160], "cohereconfig": 143, "cohereforcausallm": 143, "coincid": 12, "cold": 16, "collabor": [8, 10, 11, 13, 15, 16, 17, 20, 114, 141], "collect": [1, 8, 12, 13, 15, 17, 20, 36, 43, 88, 101, 115, 119, 121, 125, 141, 155, 156, 159, 161], "collect_and_bia": 142, "collector": 16, "collis": 96, "color": [67, 98, 138], "column": [118, 141, 149, 166], "columnlinear": [118, 123, 142], "colwis": 166, "com": [2, 9, 13, 18, 27, 28, 29, 31, 32, 37, 38, 63, 100, 106, 125, 127, 141, 151, 160, 175], "combin": [0, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 23, 26, 28, 29, 31, 32, 38, 43, 64, 65, 66, 69, 79, 82, 91, 93, 96, 99, 103, 105, 115, 121, 132, 135, 138, 142, 143, 144, 153, 155, 160, 163, 166, 178], "combinedtimesteplabelembed": 142, "combinedtimesteptextprojembed": 142, "come": [4, 10, 16, 17, 19, 21, 88, 98, 101, 114, 118, 133, 134, 138, 140, 147, 151, 155], "comm": 155, "comma": [141, 146], "command": [2, 9, 17, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 41, 42, 61, 64, 65, 66, 67, 69, 71, 80, 82, 95, 96, 106, 107, 117, 118, 120, 123, 124, 125, 127, 128, 129, 131, 132, 133, 139, 144, 147, 151, 154, 159, 160, 164, 166, 173, 175], "commandr": 160, "comment": [37, 160, 166], "commerci": [31, 32], "commit": [2, 12, 15, 18, 20, 22, 24, 27, 37, 38, 160], "committe": 36, "commmod": 0, "common": [0, 2, 12, 15, 16, 19, 20, 22, 26, 33, 38, 52, 68, 79, 81, 92, 98, 105, 113, 116, 117, 121, 141, 147, 154, 155, 165, 177], "commonli": [8, 11, 13, 27, 115, 160], "commstat": 0, "commtyp": 0, "commun": [0, 8, 10, 11, 12, 15, 17, 21, 23, 31, 32, 85, 88, 105, 110, 114, 119, 125, 135, 141, 144, 150, 155, 160, 170], "communicationmod": 0, "communicationtyp": 0, "compact": [12, 155], "compani": [18, 61, 63, 88], "compar": [1, 2, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 24, 79, 96, 99, 126, 135, 138, 139, 140, 141, 155, 163], "comparison": [4, 13, 14, 42, 114, 132, 155], "compat": [9, 12, 14, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 31, 32, 33, 34, 42, 66, 86, 87, 88, 103, 106, 109, 121, 127, 139, 142, 150, 154, 156, 159, 160, 161, 168, 171, 172], "compbin": 118, "compel": 8, "compet": [10, 16, 20], "competit": 21, "compil": [10, 18, 21, 28, 29, 31, 32, 33, 34, 41, 42, 80, 81, 82, 84, 85, 105, 114, 119, 120, 131, 132, 141, 151, 155, 160, 164, 165, 166, 168, 169, 170], "compile_backend": [80, 84, 86, 164, 168, 171], "compile_model": [80, 82, 164, 166, 168, 169], "complementari": 8, "complet": [0, 1, 8, 10, 11, 12, 16, 18, 20, 21, 26, 28, 29, 30, 31, 32, 33, 34, 36, 39, 40, 42, 43, 44, 45, 47, 61, 69, 72, 73, 77, 81, 86, 87, 88, 90, 92, 95, 97, 98, 99, 103, 104, 105, 106, 111, 114, 116, 117, 121, 132, 133, 138, 139, 155, 159, 160, 165, 168, 171, 172, 173, 174, 177, 178], "complete_sent": 63, "completion_token": [18, 21, 28, 29, 30, 31, 32, 33, 34, 159], "completionoutput": [47, 105, 155], "complex": [8, 11, 13, 16, 36, 63, 82, 104, 105, 108, 115, 116, 121, 125, 166], "compli": 27, "complic": [10, 14, 15, 16, 103, 156, 161], "compon": [7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 36, 40, 61, 79, 92, 100, 103, 104, 105, 111, 113, 125, 149, 162], "compos": [0, 11, 16, 20, 33, 105, 114, 132], "comprehens": [2, 8, 22, 27, 30, 36, 80, 90, 159, 164], "compress": [3, 15, 87, 172], "compris": [7, 8, 17], "comput": [0, 1, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 39, 40, 41, 42, 52, 57, 58, 60, 61, 62, 63, 79, 85, 88, 91, 92, 93, 94, 97, 98, 99, 100, 104, 105, 106, 109, 112, 113, 114, 117, 121, 125, 131, 132, 134, 135, 140, 141, 144, 147, 151, 152, 154, 155, 156, 159, 160, 161, 162, 163, 170, 174, 177], "computation": 8, "compute_relative_bia": 142, "computecontextlogit": 1, "computecountandindicedevic": 12, "computecumsumdevic": 12, "computed_block": 61, "computed_posit": 61, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [13, 123, 141], "concat_kvcach": 13, "concat_qkv": 20, "concaten": [12, 13, 79, 113, 118, 126, 141, 155, 156, 161], "concept": [11, 17, 42, 99, 125, 132, 136, 137, 155, 160, 177], "conceptu": [1, 36], "concern": [16, 125, 147], "concert": 8, "concis": 21, "conclud": [16, 20], "conclus": [19, 105, 136, 137], "concret": [16, 103, 155, 156, 161], "concurr": [1, 2, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 40, 42, 88, 94, 95, 96, 97, 104, 110, 121, 132, 160, 173, 174], "concurrency_": [26, 28, 29, 30, 31, 32, 33, 34], "concurrency_list": [26, 28, 29, 30, 31, 32, 33, 34], "cond_proj_dim": 142, "conda": 160, "condit": [0, 1, 8, 11, 16, 17, 37, 42, 88, 91, 111, 114, 115, 121, 132, 141, 142, 160], "condition": [38, 141], "conditioning_embed": 142, "conditioning_embedding_dim": 142, "conduct": [12, 17, 20, 42, 88, 110, 113, 132], "confid": 11, "config": [0, 1, 2, 3, 9, 14, 15, 18, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 42, 48, 54, 61, 70, 79, 82, 88, 92, 93, 94, 103, 104, 113, 117, 118, 122, 123, 126, 127, 132, 138, 142, 143, 144, 146, 151, 155, 156, 160, 161, 166, 177], "config_cl": 155, "config_class": 143, "config_dir": 143, "config_fil": [27, 143, 155], "config_load": [87, 172], "configdict": 155, "configur": [0, 1, 2, 5, 17, 18, 19, 20, 22, 23, 26, 30, 35, 37, 39, 42, 43, 55, 56, 61, 62, 64, 65, 66, 67, 79, 81, 84, 85, 86, 87, 88, 90, 97, 98, 99, 102, 103, 104, 105, 106, 109, 112, 113, 116, 120, 121, 126, 132, 133, 136, 137, 138, 140, 143, 146, 147, 151, 155, 159, 160, 163, 165, 169, 170, 171, 172, 174], "configuration_llama": [156, 161], "configuration_mymodel": [156, 161], "configuration_util": [156, 161], "configuratorptr": 1, "confirm": [12, 52, 62, 109, 152, 154, 159], "conflict": 12, "conform": [10, 155], "confus": 33, "congest": 16, "conjunct": 138, "connect": [0, 10, 16, 18, 20, 21, 28, 29, 31, 32, 33, 34, 88, 92, 110, 119, 125, 133, 134, 136, 137], "connecticut": [29, 30], "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connector": [55, 105, 155], "connector_cach": 61, "connector_cache_dir": 61, "connector_cache_fold": 61, "connector_cache_folder_kei": 61, "connector_modul": 61, "connector_scheduler_class": 61, "connector_worker_class": 61, "consecut": 114, "consequ": [7, 134, 139], "conserv": [0, 37, 140], "consid": [0, 1, 2, 7, 8, 11, 12, 16, 17, 18, 36, 43, 53, 54, 67, 69, 78, 98, 102, 118, 121, 138, 141, 155, 156, 161, 166, 178], "consider": [7, 12, 16, 17, 20, 47, 88, 127], "consist": [4, 8, 11, 13, 20, 36, 42, 79, 115, 127, 132, 133, 141, 149, 151, 155, 160, 163], "consol": 128, "consolid": [16, 121], "const": [0, 1, 111], "const_iter": 1, "constant": [1, 16, 19, 20, 79, 113, 141, 147], "constant_to_tensor_": 141, "constantli": [52, 57, 58, 60, 109, 152, 154, 159], "constants_to_tensors_": 141, "constantthreshold": 1, "constexpr": [0, 1], "constitut": [17, 36], "constpointercast": 1, "constrain": [7, 10, 62, 90, 99, 114, 160], "constraint": [0, 7, 10, 12, 16, 17, 79, 90, 98, 99, 102, 104, 109, 113, 114, 141, 155, 158], "construct": [0, 1, 11, 17, 19, 79, 84, 103, 111, 121, 125, 132, 141, 155, 160, 163, 169], "constructor": [0, 67, 79, 105, 122, 154, 155, 159, 163], "consult": [41, 106, 121, 131], "consum": [0, 15, 16, 20, 37, 104, 115, 141, 155], "consumpt": [4, 14, 23, 79, 94, 113, 144], "contact": 141, "contain": [0, 1, 10, 11, 12, 13, 16, 18, 20, 22, 23, 24, 27, 30, 35, 37, 42, 43, 62, 64, 65, 66, 79, 85, 86, 87, 88, 92, 93, 98, 104, 105, 108, 109, 111, 113, 114, 115, 116, 118, 119, 124, 125, 126, 127, 129, 132, 141, 143, 144, 146, 149, 150, 153, 155, 160, 162, 163, 170, 171, 172], "container_id": [18, 159], "container_imag": [30, 64, 65, 66], "container_img": 27, "container_mount": 30, "container_path": [28, 29, 32], "content": [1, 9, 18, 21, 24, 27, 28, 29, 30, 31, 32, 33, 34, 38, 44, 45, 46, 56, 61, 72, 73, 76, 88, 90, 105, 110, 118, 127, 128, 141, 147, 155, 159, 160, 168], "content_typ": 155, "context": [0, 7, 10, 11, 14, 15, 16, 17, 19, 20, 23, 24, 27, 29, 30, 33, 39, 70, 78, 88, 91, 92, 102, 104, 105, 110, 112, 117, 130, 132, 136, 137, 141, 144, 146, 147, 151, 155, 158, 160, 163, 176, 177, 178], "context_and_gener": 155, "context_chunking_polici": [140, 155], "context_extra": 88, "context_fmha": [23, 98, 118, 144], "context_fmha_fp32_acc": 160, "context_fmha_typ": [113, 144, 147], "context_init": 178, "context_len": [79, 146, 163], "context_length": [141, 142, 146, 151], "context_logit": [28, 31, 32, 146, 155], "context_mem_s": 146, "context_onli": 155, "context_parallel_s": [27, 91, 155], "context_phas": [79, 113], "context_pre_onli": 142, "context_request": 178, "context_serv": [17, 88], "contextchunkingpolici": [0, 105, 140, 155, 160], "contextfmha": 1, "contextfmhatyp": 144, "contextidx": 0, "contextlogit": 0, "contextmanag": 154, "contextparallel": [0, 1], "contextphaseparam": [0, 155], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contigu": [12, 88, 104, 110, 116, 134, 141, 160], "continu": [1, 5, 7, 11, 12, 17, 20, 23, 39, 40, 61, 79, 85, 98, 102, 105, 106, 111, 113, 121, 135, 138, 146, 155, 159, 170, 178], "contract": [42, 132], "contrast": [79, 114, 121, 163], "contrib": [3, 160], "contribut": [8, 10, 11, 14, 15, 16, 42, 85, 101, 127, 132, 141, 160, 170], "contributor": [8, 13, 16, 17, 147], "control": [0, 8, 18, 19, 20, 21, 26, 28, 29, 31, 32, 33, 34, 39, 47, 55, 68, 69, 78, 79, 80, 88, 98, 102, 104, 105, 110, 113, 114, 115, 120, 131, 132, 133, 140, 141, 142, 146, 149, 155, 160, 164], "contronl": 11, "conv": 141, "conv1d": [23, 141, 142, 144], "conv2d": [141, 142], "conv3d": [141, 142], "conv_bia": 141, "conv_kernel": 146, "conv_stat": 143, "conv_state_or_ptr": 141, "conv_transpose2d": 141, "conv_weight": 141, "convei": 36, "conveni": [1, 11, 106, 123, 127], "convent": [8, 87, 127, 141, 172], "converg": 20, "convers": [1, 6, 7, 17, 20, 67, 88, 94, 126, 158, 160], "convert": [0, 1, 12, 16, 20, 27, 42, 63, 79, 85, 87, 100, 104, 118, 122, 123, 124, 125, 126, 127, 132, 133, 135, 151, 155, 160, 163, 170, 172], "convert_and_load_weights_into_trtllm_llama": 127, "convert_checkpoint": [112, 118, 122, 123, 124, 125, 127, 133, 134, 151, 160], "convert_enable_dis": 144, "convert_hf_mpt_legaci": 160, "convert_load_format": 155, "convert_util": 160, "convert_weights_from_custom_training_checkpoint": 127, "convkernel": 1, "convolut": [0, 146], "convtranspose2d": 142, "cooper": 105, "coordin": [12, 16, 20, 29, 33, 39, 40, 105, 121, 141], "copi": [0, 1, 10, 11, 16, 23, 61, 79, 88, 92, 93, 104, 110, 117, 121, 129, 135, 141, 144, 147, 155, 160, 163], "copy_": 61, "copy_on_partial_reus": [93, 155], "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [3, 4, 6, 8, 10, 12, 15, 20, 21, 24, 40, 43, 82, 85, 86, 92, 93, 101, 105, 106, 114, 115, 118, 122, 125, 127, 132, 134, 144, 151, 155, 160, 162, 166, 170, 171], "corner": [15, 29], "coroutin": [58, 59, 155], "corpor": 61, "correct": [10, 11, 12, 14, 19, 20, 33, 79, 85, 101, 104, 111, 113, 118, 121, 160, 170], "correctli": [10, 39, 88, 92, 104, 110, 117, 141, 156, 160, 161], "correl": 19, "correspond": [0, 1, 8, 10, 12, 14, 16, 17, 19, 27, 39, 41, 78, 79, 92, 99, 103, 104, 107, 112, 113, 115, 116, 118, 121, 126, 127, 131, 139, 141, 142, 146, 149, 151, 155, 156, 160, 161], "correspondingli": 16, "corridor": 29, "corrupt": 16, "cost": [8, 13, 14, 15, 16, 19, 21, 42, 63, 100, 104, 117, 125, 132, 134, 147, 160], "costli": 13, "cot": [24, 105, 160], "could": [0, 10, 12, 16, 20, 21, 30, 33, 52, 57, 58, 59, 60, 88, 103, 104, 106, 109, 115, 116, 117, 124, 133, 147, 151, 152, 154, 155, 159, 160], "couldn": [98, 138], "count": [0, 1, 8, 16, 20, 43, 50, 51, 63, 99, 103, 104, 114, 132, 154, 155], "count_include_pad": [141, 142], "counterpart": 0, "countlocallay": 1, "countlowerranklay": 1, "countri": 90, "coupl": [10, 39], "cours": 121, "court": [52, 109, 152, 154, 159], "cover": [2, 8, 16, 21, 26, 28, 29, 30, 31, 32, 33, 34, 37, 135, 136, 137, 139], "coverag": [10, 16, 104], "cp": 91, "cp312": 106, "cp_config": [91, 155], "cp_group": [141, 142], "cp_rank": [141, 142], "cp_size": [27, 141, 142, 145, 160], "cp_split_plugin": 141, "cp_type": 91, "cpp": [2, 15, 41, 42, 43, 65, 79, 106, 111, 113, 114, 120, 125, 131, 132, 133, 146, 151, 160], "cpp_e2e": 146, "cpp_llm_onli": 146, "cpp_onli": 106, "cptype": 91, "cpu": [0, 1, 13, 14, 16, 19, 21, 22, 23, 27, 40, 61, 62, 79, 92, 93, 95, 96, 97, 105, 116, 117, 118, 122, 125, 141, 147, 151, 155, 160, 163, 173, 174], "cpu_tensor": 61, "cpumemusag": [0, 155], "craft": 12, "crash": 160, "crd": 49, "creat": [1, 8, 10, 11, 13, 18, 19, 20, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 42, 43, 47, 49, 52, 53, 57, 58, 59, 60, 61, 63, 72, 73, 74, 75, 76, 79, 82, 88, 90, 92, 93, 95, 98, 99, 102, 103, 104, 105, 106, 109, 110, 111, 115, 116, 117, 121, 122, 123, 125, 127, 128, 132, 133, 138, 139, 141, 142, 143, 144, 146, 147, 152, 154, 155, 156, 159, 160, 161, 162, 163, 166, 173, 178], "create_allreduce_plugin": 141, "create_attention_const_param": 142, "create_builder_config": 122, "create_cuda_graph_metadata": [79, 103, 163], "create_execution_context": 146, "create_fake_weight": 141, "create_from_prompt": 11, "create_network": 125, "create_pytorch_model_based_executor": [177, 178], "create_runtime_default": 143, "create_scaffolding_output": 11, "create_sinusoidal_posit": 141, "create_sinusoidal_positions_for_attention_plugin": 141, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 141, "create_sinusoidal_positions_long_rop": 141, "create_sinusoidal_positions_long_rope_for_attention_plugin": 141, "create_sinusoidal_positions_yarn": 141, "createloramodul": 1, "creation": [1, 40, 80, 141, 147, 155, 164], "creativ": [69, 114], "creator": [1, 155], "creatorptr": 1, "credenti": 9, "criteria": [40, 97, 146, 174], "critic": [8, 12, 13, 16, 17, 40, 42, 104, 132, 151], "crop": 142, "cropped_pos_emb": 142, "cross": [0, 10, 11, 13, 14, 16, 29, 61, 104, 118, 119, 141, 146, 155, 160], "cross_attent": [142, 146], "cross_attention_dim": 142, "cross_attention_mask": [142, 146], "cross_attention_mask_for_context": 146, "cross_attention_mask_for_gen": 146, "cross_attention_norm": 142, "cross_attention_norm_num_group": 142, "cross_attention_packed_mask": 142, "cross_attn_dens": [23, 118], "cross_attn_k": [23, 118], "cross_attn_q": [23, 118], "cross_attn_qkv": [23, 118], "cross_attn_v": [23, 118], "cross_kv": 141, "cross_kv_cache_block_offset": [142, 146], "cross_kv_cache_fract": [146, 155], "cross_kv_cache_gen": [142, 143], "cross_kv_length": 141, "cross_kv_reus": [142, 143], "crossattentionmask": 0, "crosskvcachefract": [0, 160], "crosskvcachestat": 0, "crucial": [7, 8, 10, 33, 62, 68, 121, 125, 162], "csv": 22, "ct": 33, "cta": 20, "ctor": 141, "ctrl": [26, 30], "ctx": [0, 2, 8, 17, 30, 104], "ctx1dep4": 17, "ctx_len": 8, "ctx_param": [17, 88], "ctx_request_id": 155, "ctxenginepath": 0, "ctxexecutorconfig": 0, "ctxreqrat": 17, "cu": [13, 125], "cu12": 160, "cu130": 109, "cuassert": 151, "cubin": 160, "cubla": [15, 21], "cublaslt": [23, 139, 144], "cublasltmatmul": 15, "cublasscaledmm": 15, "cuda": [0, 1, 2, 12, 14, 15, 16, 18, 20, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 41, 42, 61, 63, 68, 79, 86, 88, 89, 92, 102, 103, 105, 106, 109, 110, 113, 119, 125, 131, 132, 143, 146, 147, 148, 151, 155, 157, 160, 163, 168, 171, 177], "cuda_arch": 106, "cuda_architectur": [2, 106, 120], "cuda_graph": 68, "cuda_graph_batch_s": [26, 28, 29, 31, 32, 33, 34, 70, 80, 155, 160, 164, 168], "cuda_graph_cache_s": 155, "cuda_graph_config": [2, 9, 14, 15, 21, 30, 43, 48, 61, 65, 68, 70, 102, 104, 155, 160], "cuda_graph_inst": 151, "cuda_graph_mod": [146, 151, 155], "cuda_graph_padding_en": 70, "cuda_hom": 109, "cuda_launch_block": 151, "cuda_stream": 151, "cuda_stream_guard": 146, "cuda_stream_sync": 141, "cuda_visible_devic": 88, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": [10, 80, 85, 86, 104, 160, 164, 170, 171], "cudagraphcaches": 0, "cudagraphconfig": [68, 70, 105, 155], "cudagraphlaunch": [20, 151], "cudagraphmod": 0, "cudagraphrunn": 103, "cudagriddependencysynchron": 12, "cudahostregist": 20, "cudalaunchhostfunc": 10, "cudamalloc": [1, 20, 88, 110], "cudamallocasync": [1, 88, 110], "cudamallocmanag": 20, "cudamemadvis": 20, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": [41, 131], "cudart": 151, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudatriggerprogrammaticlaunchcomplet": 12, "cudavirtualmemori": 1, "cudavirtualmemoryalloc": 1, "cudavirtualmemorychunk": 1, "cudavirtualmemorymanag": 1, "cudevic": 1, "cudeviceptr": 1, "cudnn": [21, 160], "cufil": 0, "cuh": 13, "cultur": 34, "cumbersom": 104, "cumemaccessdesc": 1, "cumemallocationprop": 1, "cumemcr": 1, "cumemgenericallocationhandl": 1, "cumemimportfromshareablehandl": [88, 110], "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [141, 160], "cumsumgenerationlength": 1, "cumsumlastdim": 141, "cumsumlength": 1, "cumul": [0, 1, 8, 69, 141, 155], "cumulative_logprob": [47, 155], "cupi": 10, "curand": 160, "curl": [9, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 66, 77, 88, 96, 105, 159, 168], "currenc": [42, 132], "current": [0, 1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 27, 38, 39, 40, 42, 47, 56, 67, 79, 82, 85, 87, 88, 90, 91, 92, 93, 94, 97, 98, 101, 102, 103, 106, 107, 109, 110, 111, 113, 118, 121, 132, 135, 138, 139, 140, 141, 144, 146, 147, 153, 155, 160, 162, 163, 166, 170, 172, 177, 178], "current_image_tag": 38, "current_prompt": 11, "current_stream": 151, "current_torch_vers": 109, "currentexpandindic": 1, "curti": 61, "curv": [6, 10, 17, 20, 91], "custom": [3, 10, 11, 13, 14, 16, 17, 22, 23, 27, 30, 35, 38, 49, 52, 61, 63, 64, 69, 78, 79, 82, 84, 85, 86, 88, 90, 92, 102, 103, 105, 106, 114, 125, 127, 135, 139, 141, 144, 146, 154, 155, 158, 160, 162, 163, 166, 169, 170, 171], "custom_all_reduc": 160, "custom_format": [87, 172], "custom_mask": 141, "custom_module_dir": [22, 27], "custom_op": 104, "customallreduc": 160, "customcheckpointload": [87, 172], "customconfigload": [87, 172], "customdataset": 160, "customiz": 61, "customized_key_dict": 126, "customized_preprocess": 126, "customizedmodulea": 126, "customizedmoduleb": 126, "customweightload": [87, 172], "customweightmapp": [87, 172], "custream": 1, "cut": 21, "cutedsl": [70, 155], "cutlass": [15, 21, 26, 27, 28, 29, 31, 32, 33, 34, 70, 120, 155, 160], "cutlass_kernel": 120, "cxx11": [106, 160], "cycl": [20, 62], "cyclic": [8, 94, 146, 160], "d": [1, 9, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 42, 44, 45, 46, 64, 65, 66, 67, 88, 118, 120, 128, 132, 141, 142, 151, 159, 160, 168], "d0": 13, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 132, "d1": 103, "d2": 103, "d2h": 10, "d2t": 10, "d3": 103, "d_": 14, "d_6": 14, "dag": 104, "dai": [85, 158, 170], "dailymail": 24, "dame": 34, "dangl": 115, "data": [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 32, 38, 39, 42, 43, 61, 63, 70, 73, 79, 88, 89, 92, 93, 96, 100, 104, 110, 113, 114, 116, 119, 125, 126, 132, 133, 141, 143, 150, 151, 155, 156, 157, 160, 161], "data_devic": 22, "data_path": 65, "data_ptr": 160, "data_typ": [122, 124], "databas": [61, 87, 172], "dataclass": [36, 61, 92], "datacontext": 0, "dataset": [10, 11, 13, 14, 15, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 45, 65, 73, 80, 95, 99, 105, 131, 135, 155, 160, 164, 173], "dataset_fil": [30, 43], "dataset_path": [24, 95, 132, 173], "datatyp": [0, 1, 114, 125, 141, 146, 149, 151], "datatypetrait": 1, "date": [90, 127], "datetim": 155, "db": 37, "dbrx": [149, 150, 160], "dbrxconfig": 143, "dbrxforcausallm": 143, "dconv": 141, "ddc": 104, "de": 1, "deactiv": 47, "dead": [104, 160], "deal": [79, 104, 113, 115, 151], "dealloc": [1, 40, 116, 178], "death": [52, 109, 152, 154, 159], "debug": [0, 16, 20, 22, 23, 24, 27, 62, 78, 83, 86, 93, 106, 116, 144, 146, 147, 155, 160, 166, 167, 171], "debug_buff": 151, "debug_mod": [146, 151], "debug_tensors_to_sav": 146, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [23, 146, 160], "decai": [0, 114, 155], "decid": [16, 42, 53, 79, 92, 100, 113, 124, 132, 136, 138, 149, 162, 178], "decilmforcausallm": [150, 157], "decim": 155, "decis": [8, 16, 20, 67, 141, 166], "declar": [1, 114, 115, 127, 162, 177], "decltyp": [0, 1], "decod": [0, 1, 8, 12, 13, 15, 16, 17, 18, 24, 27, 40, 55, 62, 69, 76, 78, 79, 85, 88, 89, 91, 92, 94, 96, 97, 102, 104, 105, 110, 113, 114, 123, 127, 132, 141, 143, 144, 146, 150, 152, 154, 155, 156, 157, 158, 160, 161, 170, 174, 176, 177], "decode_batch": 146, "decode_duration_m": [54, 93, 155], "decode_regular": 146, "decode_retention_polici": 93, "decode_retention_prior": [54, 155], "decode_stream": 146, "decode_words_list": 146, "decode_wrapp": [79, 163], "decodedurationm": 0, "decoder_input_id": [143, 146], "decoder_language_adapter_rout": 146, "decoder_lay": [156, 161], "decoder_start_token_id": 23, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderinputbuff": 1, "decoderjsonconfigstr": 0, "decoderlay": [156, 161], "decoderlayerlist": 123, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": [79, 113], "decodermodel": [0, 143, 156, 161], "decodermodelforcausallm": [123, 127, 143, 156, 161], "decodermodelpath": 0, "decoderst": 160, "decoderxqarunn": [79, 113], "decoding_config": 155, "decoding_typ": [2, 9, 14, 18, 28, 103, 104, 155], "decodingbaseconfig": 155, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 160], "decodingoutputptr": 1, "decompos": [16, 79, 98, 113], "decomposit": [95, 173], "decor": [10, 12, 36, 155, 156, 161], "decoupl": [10, 11, 13, 16, 17, 88, 94, 99, 120, 147], "decreas": [2, 3, 4, 8, 10, 20, 83, 135, 167], "dedic": [8, 10, 13, 15, 16, 17, 20, 21, 36, 40, 151], "deduc": [16, 23, 24, 27, 160], "deem": 11, "deep": [4, 5, 12, 18, 21, 41, 105, 125, 131, 141, 155, 160], "deepcopi": 11, "deepep": [16, 104, 160], "deeper": [12, 14], "deepgemm": [2, 28, 35, 48, 70, 155, 160], "deeplearn": [125, 141, 151], "deepli": 16, "deepseek": [8, 10, 11, 12, 16, 20, 24, 27, 35, 41, 71, 77, 86, 91, 99, 100, 101, 103, 104, 105, 131, 150, 155, 157, 158, 160, 171], "deepseek_model": 103, "deepseek_r1_output": 28, "deepseek_v1": 160, "deepseek_v2": 160, "deepseek_v3": [13, 160], "deepseekforcausallm": 143, "deepseeksparseattentionconfig": [70, 105, 155], "deepseekv1config": 143, "deepseekv2": 141, "deepseekv2attent": 142, "deepseekv2config": 143, "deepseekv2forcausallm": 143, "deepseekv3forcausallm": [150, 157], "deepseekv3routingimpl": 15, "deepspe": 124, "def": [10, 11, 36, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 70, 71, 87, 91, 102, 104, 109, 115, 123, 125, 126, 127, 133, 135, 139, 140, 151, 152, 154, 155, 156, 159, 161, 172, 178], "default": [0, 1, 10, 11, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 40, 41, 42, 43, 47, 53, 54, 62, 63, 67, 68, 69, 70, 71, 78, 79, 80, 81, 83, 86, 87, 88, 93, 94, 97, 98, 99, 100, 101, 102, 103, 104, 106, 107, 109, 110, 111, 112, 113, 114, 117, 120, 124, 126, 127, 128, 131, 135, 136, 137, 138, 139, 140, 141, 143, 144, 146, 147, 149, 151, 155, 156, 160, 161, 163, 164, 165, 167, 171, 172, 174], "default_factori": [61, 155], "default_net": 141, "default_plugin_config": 143, "default_record_cr": 155, "default_trtnet": 125, "defaultvalu": 1, "defer": [20, 141], "defin": [0, 1, 2, 5, 8, 11, 16, 22, 23, 36, 37, 38, 39, 42, 62, 79, 82, 87, 88, 90, 92, 93, 98, 103, 104, 111, 113, 115, 121, 124, 125, 126, 127, 132, 139, 141, 142, 144, 149, 155, 156, 158, 160, 161, 163, 166, 172], "definit": [11, 13, 49, 79, 85, 87, 104, 105, 111, 113, 116, 127, 141, 151, 158, 160, 170, 172], "defrag": 20, "deftruth": 160, "degrad": [0, 18, 23, 135, 155], "degre": [16, 38, 43, 52, 57, 58, 60, 109, 135, 139, 152, 154, 159], "del": 61, "delai": [8, 16, 17, 20, 26, 28, 29, 31, 32, 43, 88, 155, 160], "deleg": [79, 141, 163], "delet": [0, 1, 16, 144, 151, 155], "delimit": 155, "deliv": [2, 3, 6, 8, 12, 13, 14, 19, 21, 26, 43, 99, 158], "delta": [0, 13, 14, 141, 142], "delta_bia": 141, "delta_softplu": 141, "delv": 15, "demand": [13, 15, 16, 17, 63, 88, 99, 155], "demo": [11, 13, 27, 45, 61, 62, 69, 73, 85, 170], "demo_prompt": 69, "demollm": [80, 81, 86, 164, 165, 171], "demonstr": [4, 8, 10, 11, 12, 13, 16, 17, 20, 21, 40, 61, 62, 64, 68, 69, 70, 84, 91, 92, 96, 102, 111, 126, 133, 135, 138, 139, 154, 169], "demonstrate_beam_search": 69, "demonstrate_combined_sampl": 69, "demonstrate_greedy_decod": 69, "demonstrate_multiple_sequ": 69, "demonstrate_temperature_sampl": 69, "demonstrate_top_k_sampl": 69, "demonstrate_top_p_sampl": 69, "demonstrate_with_logprob": 69, "denois": 142, "denot": 121, "dens": [29, 43, 79, 112, 113, 118, 124, 126, 141, 160], "dense_4h_to_h": 126, "dense_bia": 142, "dense_h_to_4h": 126, "denser": 104, "densiti": [7, 21], "dep": 106, "dep4": [17, 29], "dep8": [17, 29], "depend": [0, 5, 10, 16, 17, 18, 20, 24, 27, 29, 33, 34, 43, 53, 78, 79, 88, 94, 104, 106, 109, 111, 113, 114, 115, 120, 121, 124, 133, 135, 139, 141, 147, 151, 155, 159, 160, 177], "deploi": [11, 12, 16, 20, 27, 35, 49, 85, 86, 99, 105, 121, 124, 170, 171], "deplot": [150, 160], "deploy": [7, 8, 13, 16, 17, 18, 19, 20, 21, 49, 82, 86, 88, 91, 99, 132, 135, 153, 154, 158, 159, 160, 166, 171], "deprec": [23, 120, 132, 155, 160], "deprecatedparseprotocol": 155, "deprecationwarn": [36, 132], "depriorit": 120, "depriv": 115, "depth": [86, 121, 155, 171], "dequ": [0, 1], "dequant": [12, 79, 113, 119, 141], "deregistermemori": 0, "deriv": [12, 19, 103, 125, 126, 141, 147, 162], "desc": [0, 1], "descend": 93, "descendli": 114, "describ": [0, 2, 6, 16, 17, 21, 27, 36, 37, 39, 42, 43, 45, 69, 73, 79, 92, 93, 103, 106, 109, 113, 114, 116, 117, 118, 121, 123, 125, 126, 129, 132, 139, 141, 149, 151, 163], "descript": [0, 1, 22, 26, 27, 28, 29, 31, 32, 33, 34, 36, 42, 43, 62, 67, 68, 78, 79, 80, 81, 86, 90, 91, 92, 105, 114, 118, 132, 139, 141, 144, 155, 163, 164, 165, 171], "descriptor": [0, 155], "deseri": [0, 20, 127], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializeblockkei": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcachecreateddata": 0, "deserializekvcacheev": 0, "deserializekvcacheeventdiff": 0, "deserializekvcacheremoveddata": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializekvcachestoredblockdata": 0, "deserializekvcachestoreddata": 0, "deserializekvcacheupdateddata": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializemultimodalinput": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "deserializeuniquetoken": 0, "design": [1, 2, 7, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 35, 39, 40, 61, 62, 79, 85, 86, 87, 88, 91, 92, 93, 99, 104, 105, 119, 121, 125, 126, 127, 133, 154, 158, 159, 162, 163, 170, 171, 172, 177], "desir": [22, 38, 43, 79, 99, 102, 111, 141, 155, 163, 172], "desired_world_s": [84, 169], "despit": [8, 11], "destin": [0, 64, 65, 66], "destroi": [1, 61, 92, 147], "destroyipcmemori": 1, "destruct": 1, "destructor": 1, "detach": 18, "detail": [0, 2, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 23, 24, 26, 27, 28, 30, 33, 34, 35, 37, 38, 42, 43, 47, 49, 52, 56, 68, 79, 88, 91, 93, 96, 99, 102, 111, 113, 119, 121, 123, 125, 132, 133, 135, 140, 141, 143, 147, 151, 155, 159, 160, 162, 163, 177], "detect": [0, 16, 20, 22, 24, 27, 38, 111, 141, 155, 160, 166], "detect_format": 126, "detect_shard": [82, 166, 169], "determin": [0, 1, 11, 14, 16, 17, 20, 40, 79, 85, 86, 88, 93, 94, 98, 103, 113, 114, 118, 127, 134, 135, 140, 141, 143, 149, 155, 162, 170, 171, 177, 178], "determinenumpag": 1, "determinist": [61, 69, 139, 155, 160], "detoken": [20, 21, 40, 155, 160, 162], "detokenizedgenerationresultbas": 155, "dev": [9, 16, 18, 21, 24, 28, 29, 30, 31, 32, 33, 34, 85, 105, 109, 160, 170], "dev_container_imag": 38, "dev_trtllm_imag": 106, "devcontain": 38, "devel": [106, 128, 129], "develop": [8, 11, 13, 14, 16, 17, 18, 20, 21, 24, 28, 29, 31, 32, 36, 37, 38, 39, 52, 57, 58, 60, 61, 85, 86, 88, 92, 96, 101, 106, 109, 123, 124, 125, 127, 128, 133, 141, 150, 152, 154, 156, 158, 159, 160, 161, 170, 171], "deviat": [16, 22, 43], "devic": [0, 1, 10, 16, 17, 20, 21, 22, 28, 29, 39, 61, 63, 88, 110, 135, 141, 143, 145, 146, 151, 155], "device_cache_perc": [95, 173], "device_id": 146, "device_map": 145, "device_memory_size_v2": 147, "device_num_expert": 141, "device_request_typ": 143, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 110], "dgx": [2, 15, 21, 37, 43, 114, 125], "di": [14, 16, 17, 88], "diagnost": 1, "diagon": 141, "diagram": [11, 12, 15, 17, 88, 121], "dial": 53, "diamond": [13, 15, 24], "dict": [36, 82, 87, 90, 123, 126, 127, 141, 143, 146, 155, 156, 160, 161, 166, 172, 177], "dict_kei": 151, "dictat": 138, "dictionari": [19, 78, 82, 87, 124, 126, 142, 155, 166, 172], "did": 12, "didn": [98, 104, 138], "differ": [0, 1, 2, 7, 8, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 31, 32, 33, 35, 39, 42, 43, 45, 61, 62, 69, 73, 79, 82, 86, 87, 88, 91, 92, 94, 95, 98, 99, 100, 102, 103, 104, 106, 110, 112, 113, 114, 116, 117, 119, 123, 124, 125, 126, 127, 132, 133, 135, 138, 139, 141, 143, 144, 146, 147, 149, 155, 158, 159, 160, 163, 166, 171, 172, 173, 176], "differenti": 141, "difficult": [8, 10, 104], "difficulti": [11, 24, 103], "difftyp": 1, "diffus": [27, 45, 73, 142, 160], "diffusersattent": 142, "difi": 11, "digit": [19, 155], "dilat": [141, 142], "dim": [0, 1, 104, 141, 142, 143, 146, 151], "dim0": 141, "dim1": 141, "dim_head": 142, "dim_in": 142, "dim_out": 142, "dim_rang": 141, "dimems": 1, "dimens": [0, 1, 12, 15, 16, 26, 79, 81, 104, 113, 114, 118, 141, 142, 143, 147, 151, 155, 156, 160, 161, 165, 166], "dimension": 141, "diminish": [11, 16], "dimrang": 141, "dimtype64": [0, 1], "dir": [9, 22, 26, 28, 29, 30, 31, 32, 33, 34, 42, 47, 106, 132], "direct": [0, 12, 17, 29, 30, 82, 88, 109, 110, 119, 127, 151, 166], "directli": [0, 10, 11, 12, 14, 15, 16, 17, 20, 28, 29, 31, 32, 33, 34, 36, 47, 79, 85, 87, 88, 93, 100, 103, 104, 106, 110, 114, 115, 121, 125, 127, 129, 132, 139, 140, 141, 154, 155, 159, 160, 163, 170, 172, 178], "directori": [0, 9, 16, 20, 21, 22, 23, 24, 27, 28, 29, 31, 32, 33, 34, 35, 42, 43, 61, 64, 65, 66, 67, 85, 87, 106, 111, 123, 124, 125, 126, 127, 132, 133, 143, 146, 154, 155, 156, 160, 161, 170, 172], "dirnam": 61, "disabl": [0, 1, 10, 12, 16, 21, 22, 23, 24, 27, 38, 53, 62, 88, 97, 102, 103, 113, 114, 117, 122, 126, 132, 135, 139, 140, 141, 144, 146, 147, 155, 160, 168, 174], "disable_chunked_context": 22, "disable_finalize_fus": 155, "disable_flashinfer_sampl": 155, "disable_forward_chunk": 143, "disable_kv_cach": 146, "disable_kv_cache_reus": [24, 96], "disable_overlap_schedul": [15, 30, 71, 88, 97, 102, 103, 155, 174], "disable_weight_only_quant_plugin": 143, "disable_xqa": 113, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [127, 134], "disagg": [101, 109, 160], "disagg_cluster_uri": 27, "disagg_config": [17, 88], "disagg_executor": 0, "disaggexecutororchestr": 0, "disaggr_torch": 30, "disaggreg": [0, 8, 10, 20, 49, 89, 91, 92, 101, 105, 155, 157, 158, 160, 168], "disaggregated_param": [21, 28, 29, 30, 31, 32, 33, 155], "disaggregatedparam": [105, 155], "disaggserverbenchmark": 160, "disallow": [10, 155], "disappear": 10, "discard": [11, 62, 135, 155], "disclaim": [14, 133, 135, 138, 139], "disclosur": 160, "disconnect": 160, "discourag": [0, 63, 114, 155], "discov": [10, 20, 69, 109, 125], "discover": 36, "discoveri": 38, "discrep": [17, 106, 156, 161], "discret": 93, "discuss": [11, 14, 16, 20, 113, 133, 135, 139, 140, 160], "disk": [61, 87, 92, 106, 111, 127, 172], "dispar": 8, "dispatch": [0, 11, 12, 13, 16, 17, 47, 88, 99, 103, 104, 112, 127, 160], "dispers": 36, "displai": [8, 16, 36, 155], "disrupt": 99, "disservingrequeststat": 0, "disservingstat": 0, "dist": [2, 39, 41, 42, 43, 65, 131, 132, 133], "distanc": [20, 79, 113, 141], "distil": [11, 86, 160, 171], "distinct": [8, 13, 17, 62, 87, 116, 118, 121, 141, 172], "distinguish": [33, 117], "distribut": [1, 13, 16, 19, 22, 26, 55, 61, 64, 78, 79, 87, 91, 98, 99, 101, 112, 113, 114, 125, 132, 141, 146, 147, 152, 154, 155, 158], "distserv": 110, "dit": [143, 160], "div": 141, "dive": [12, 14, 41, 105, 131], "diverg": [82, 166], "divers": [0, 8, 41, 114, 131], "diversity_penalti": 114, "divid": [11, 12, 14, 16, 93, 94, 126, 141, 160], "divup": 141, "dl": 7, "dlsym": 0, "do": [1, 2, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 37, 47, 61, 79, 88, 100, 103, 104, 105, 106, 110, 115, 126, 127, 133, 135, 139, 141, 151, 155, 156, 161, 163], "do_cross_attent": [141, 142], "do_layer_norm_befor": 124, "do_sampl": 114, "do_trac": 155, "doactivationkernel": 20, "doc": [1, 2, 6, 13, 16, 21, 125, 129, 135, 139, 141, 151, 160], "docker": [2, 9, 26, 35, 64, 65, 66, 105, 151, 160], "docker_run_arg": 2, "dockerfil": [106, 128], "docstr": 36, "document": [0, 4, 5, 7, 10, 12, 14, 16, 17, 22, 26, 27, 28, 36, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 56, 72, 73, 74, 75, 76, 79, 88, 92, 94, 95, 106, 107, 110, 113, 114, 116, 117, 118, 121, 123, 124, 125, 127, 130, 131, 133, 140, 141, 147, 149, 151, 155, 162, 163, 173], "doe": [0, 2, 3, 10, 11, 15, 16, 19, 23, 37, 38, 42, 43, 61, 70, 75, 79, 80, 88, 92, 98, 103, 104, 107, 110, 113, 114, 118, 121, 127, 132, 139, 141, 146, 147, 150, 155, 156, 160, 161, 164, 178], "doesn": [1, 8, 13, 20, 28, 29, 31, 32, 40, 42, 47, 71, 79, 94, 104, 113, 128, 132, 138, 139, 160, 168], "dog": 33, "dollar": [42, 132], "domain": [16, 20, 24, 88, 110, 119], "domin": [8, 13, 16, 160], "don": [8, 9, 11, 15, 16, 28, 29, 30, 31, 32, 33, 34, 61, 88, 99, 104, 110, 121, 127, 128, 134, 139, 141, 146], "done": [1, 2, 15, 16, 17, 18, 26, 28, 29, 30, 31, 32, 33, 34, 53, 88, 98, 103, 109, 117, 125, 132, 135, 138, 141, 144, 155, 156, 161], "dongjiyingdji": 160, "dora": [23, 141, 142, 144], "dora_plugin": [23, 118, 141, 144], "dot": [13, 19, 126, 141], "doubl": [0, 4, 11, 19, 33, 136, 137, 139, 151], "doubt": 11, "down": [0, 3, 10, 11, 14, 15, 16, 20, 33, 67, 111, 118, 134, 141, 146], "down_proj": [126, 166], "downgrad": 160, "download": [21, 24, 28, 29, 31, 32, 33, 34, 42, 64, 65, 66, 67, 71, 105, 106, 108, 109, 132, 133, 151, 154, 160], "downsampl": 12, "downscale_freq_shift": 142, "downsid": 139, "downstream": [96, 149], "dp": [0, 2, 3, 6, 8, 12, 13, 15, 17, 21, 29, 33, 34, 155, 160], "dp4ep4": 21, "dp8": [13, 15], "dprank": 0, "dpsize": 0, "dpu": 21, "draft": [0, 1, 13, 14, 18, 19, 20, 23, 105, 143, 146, 155, 160], "draft_len": 143, "draft_len_schedul": 155, "draft_model": 103, "draft_path": 146, "draft_target": [71, 155], "draft_target_model": 121, "draft_token": [143, 155], "draft_tokens_extern": [23, 143], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [10, 103, 121, 155, 160], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftlogitshost": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttarget": 103, "drafttargetdecodingconfig": [103, 105, 155], "drafttoken": [0, 1], "drafttokenid": 1, "drafttokenidshost": 1, "drafttokensextern": 1, "dram": [0, 125], "dramat": [8, 94], "drastic": [15, 40], "drat": 103, "draw": 19, "drawback": 36, "dreamgenx": 160, "drive": [63, 92, 125, 132], "driven": [8, 16, 104], "driver": [16, 20, 28, 29, 30, 31, 32, 33, 34, 40, 88, 109, 110, 147, 160], "drop": [2, 8, 12, 14, 15, 88, 110, 135, 138, 140], "dropdown": [28, 29, 31, 32, 33, 34], "dropout": [142, 160], "dropout_prob": 142, "dry": [33, 155], "dry_run": [23, 155, 160], "dsa": [70, 155], "dst": 1, "dstate": 141, "dstdesc": 0, "dsttype": 1, "dsv3_router_gemm_op": 104, "dt_proj": 141, "dt_rank": 141, "dtype": [1, 10, 12, 26, 28, 30, 31, 32, 42, 48, 70, 93, 100, 104, 115, 118, 122, 123, 124, 125, 127, 132, 133, 134, 141, 142, 143, 144, 145, 146, 151, 155, 157, 160, 177], "dual": 106, "duck": 155, "due": [0, 1, 2, 5, 8, 11, 12, 13, 15, 16, 18, 20, 21, 27, 28, 29, 31, 32, 33, 34, 37, 38, 39, 42, 62, 79, 98, 99, 102, 104, 106, 120, 121, 127, 132, 133, 138, 140, 146, 160, 163, 176], "duke": 29, "dummi": [22, 95, 133, 155, 160, 173], "dump": [0, 16, 90, 106, 111, 155], "dump_debug_buff": 146, "dumps_kwarg": 155, "duplic": [15, 20, 82, 160, 166], "duplicate_data": 141, "durat": [0, 12, 16, 20, 26, 28, 29, 31, 32, 93, 133], "duration_cast": 0, "duration_m": [93, 155], "durationm": 0, "dure": [0, 1, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 23, 30, 37, 39, 40, 41, 42, 68, 70, 78, 79, 80, 81, 87, 88, 91, 92, 93, 95, 98, 102, 103, 104, 106, 110, 113, 114, 115, 119, 120, 121, 122, 125, 131, 132, 139, 140, 144, 146, 147, 151, 155, 160, 163, 164, 165, 166, 172, 173, 177], "dutch": 29, "dynam": [0, 11, 13, 14, 16, 17, 20, 23, 42, 82, 86, 88, 99, 101, 103, 104, 110, 132, 141, 143, 144, 146, 147, 155, 158, 160, 166, 171, 178], "dynamic_batch_config": 155, "dynamic_batch_moving_average_window": 155, "dynamic_quant_bf16tonvfp4": 13, "dynamic_tree_max_topk": 155, "dynamicbatchconfig": [0, 105, 155], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 13, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynamicyamlmixinforset": [82, 166], "dynamo": [104, 105, 121, 154, 158], "dynamodeploymentgraph": 49, "dynasor": [105, 160], "dynasor_generation_control": 11, "dynasorgenerationcontrol": 11, "dynlibload": 0, "e": [0, 10, 11, 14, 15, 18, 20, 21, 24, 26, 27, 29, 30, 35, 36, 38, 40, 41, 42, 64, 65, 66, 78, 79, 82, 87, 88, 90, 92, 97, 99, 104, 106, 107, 109, 110, 111, 113, 116, 117, 118, 119, 126, 128, 131, 132, 141, 144, 146, 149, 151, 154, 155, 156, 159, 160, 161, 166, 174], "e2": [15, 17, 19, 30, 91, 105, 160], "e2el": [26, 28, 29, 30, 31, 32, 33, 34], "e2m1": 12, "e4m3": [4, 12, 100, 119], "e5m2": 4, "e728f08114c042309efeae4df86a50ca": 28, "e752184d1181494c940579c007ab2c5f": 18, "each": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 36, 37, 39, 40, 42, 43, 47, 54, 61, 63, 64, 65, 66, 69, 78, 79, 87, 88, 91, 93, 94, 98, 99, 101, 103, 104, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 124, 125, 132, 133, 134, 138, 139, 140, 141, 142, 144, 146, 147, 149, 151, 155, 160, 162, 163, 172, 177, 178], "eager": [15, 40, 104, 160], "eagl": [0, 1, 9, 10, 18, 23, 89, 102, 104, 143, 146, 155, 157, 158, 160, 176], "eagle3": [10, 71, 105, 121, 155, 160], "eagle3_layers_to_captur": 155, "eagle3_one_model": [18, 71, 103, 104, 155], "eagle_choic": [146, 155], "eagle_dynamic_tree_max_top_k": 146, "eagle_posterior_threshold": 146, "eagle_temperatur": 143, "eagle_use_dynamic_tre": 146, "eaglechoic": [0, 1], "eagleconfig": [0, 143], "eagledecodingconfig": [71, 103, 105, 155], "eagleforcausallm": 143, "eagleinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 14, "earli": [1, 8, 11, 36, 146, 151, 160], "earlier": [0, 11, 124, 135, 151], "early_stop": [114, 146, 155, 160], "early_stop_criteria": 146, "earlystop": [0, 1, 114], "eas": [11, 16, 17, 43], "easi": [7, 11, 16, 24, 62, 85, 87, 104, 133, 154, 158, 170, 172], "easier": [2, 12, 14, 16, 20, 42, 125, 127, 132], "easili": [2, 10, 13, 16, 30, 35, 104, 126, 141], "east": [29, 123, 125, 151], "eastern": 159, "ebnf": [0, 111, 155], "ebnf_grammar": 90, "echo": [18, 20, 27, 65, 66, 88, 109, 128, 129], "econom": 21, "ecosystem": [21, 154, 158], "eddi": 160, "edg": [4, 21], "edit": [38, 43, 106, 121], "edu": 24, "eec": 24, "ef648e7489c040679d87ed12db5d3214": 159, "effect": [0, 8, 10, 11, 12, 13, 14, 15, 20, 21, 23, 28, 29, 31, 32, 33, 34, 40, 62, 63, 69, 88, 93, 94, 98, 99, 110, 114, 119, 121, 135, 138, 139, 144, 155, 160], "effici": [8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 27, 37, 40, 45, 52, 57, 58, 60, 68, 73, 79, 85, 86, 88, 94, 95, 96, 98, 99, 109, 112, 113, 114, 117, 121, 125, 144, 147, 150, 152, 153, 154, 155, 158, 159, 162, 163, 170, 171, 173, 177], "effort": [8, 10, 14, 15, 16, 17, 20, 21, 29, 121, 124, 135, 160], "eg": 43, "egx": 21, "eiffel": 34, "eight": [2, 3], "einop": 141, "einstein": 141, "einsum": 141, "einsum_eq": 141, "either": [0, 1, 11, 13, 15, 20, 22, 24, 27, 30, 37, 87, 99, 111, 141, 147, 151, 154, 155, 160, 172], "elaps": [8, 26, 28, 29, 31, 32, 93], "element": [0, 1, 12, 16, 37, 79, 94, 98, 104, 113, 114, 118, 119, 141, 142, 149, 155], "element_typ": 1, "elementwis": [115, 141], "elementwise_affin": 142, "elementwise_binari": 141, "elementwise_sub": 115, "elementwise_sum": 115, "elementwiseoper": [115, 141], "eleutherai": [28, 31, 32, 42, 132], "elicit": 11, "elif": [69, 70, 71, 178], "elimin": [8, 10, 12, 13, 15, 23, 42, 61, 88, 98, 104, 110, 121, 132, 135, 138, 144, 158, 160], "ellipsi": 141, "els": [0, 11, 30, 47, 61, 62, 63, 70, 71, 73, 90, 103, 104, 125, 126, 127, 141, 151, 178], "emb": [73, 125, 142], "embed": [0, 14, 23, 96, 117, 123, 132, 141, 146, 155, 156, 160, 161, 163], "embed_dim": 142, "embed_posit": 142, "embed_positions_for_gpt_attent": 142, "embed_positions_for_gpt_attention_loc": 142, "embed_positions_loc": 142, "embed_token": [126, 156, 161], "embedding_bia": 155, "embedding_dim": 142, "embedding_multipli": 143, "embedding_parallel_mod": 155, "embedding_scal": 143, "embedding_sharding_dim": [124, 143], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [7, 13, 16], "emit": 155, "emot": 69, "emphas": 11, "emphasi": 124, "empir": [8, 12, 16], "emploi": [8, 16, 17, 40, 87, 121, 144, 162, 178], "employe": 63, "empow": [8, 13], "empti": [0, 1, 47, 93, 121, 141, 155, 160, 178], "empty_lik": 104, "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [141, 160], "en": 160, "enabl": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 27, 28, 30, 31, 32, 38, 39, 40, 42, 43, 47, 59, 60, 61, 62, 63, 68, 70, 78, 79, 80, 81, 82, 86, 88, 90, 91, 92, 94, 95, 96, 97, 98, 100, 102, 103, 104, 105, 106, 110, 111, 113, 114, 115, 118, 119, 120, 121, 122, 125, 126, 129, 132, 133, 138, 140, 141, 142, 143, 144, 146, 149, 151, 155, 156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 171, 173, 174, 177], "enable_": 36, "enable_allreduc": 141, "enable_attention_dp": [2, 16, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 48, 65, 70, 99, 155], "enable_autotun": [18, 155, 160], "enable_bal": [8, 29, 155], "enable_batch_size_tun": 155, "enable_block_reus": [9, 18, 27, 30, 33, 53, 62, 68, 70, 93, 155], "enable_build_cach": [155, 160], "enable_cach": 36, "enable_chunked_context": [2, 22, 146, 160], "enable_chunked_prefil": [27, 70, 94, 155, 160], "enable_context_fmha_fp32_acc": [146, 155], "enable_debug_output": [23, 151, 155], "enable_flash_attent": 36, "enable_forward_chunk": 143, "enable_fp8": 119, "enable_fullgraph": 155, "enable_if_t": 1, "enable_inductor": 155, "enable_iter_perf_stat": [27, 70, 155], "enable_iter_req_stat": 155, "enable_kv_cache_reus": 117, "enable_layerwise_nvtx_mark": 155, "enable_lm_head_tp_in_adp": [30, 155], "enable_lora": 155, "enable_max_num_tokens_tun": [155, 160], "enable_min_lat": [18, 155], "enable_mixed_sampl": 160, "enable_multi_devic": 160, "enable_offload": 62, "enable_overlap_schedul": 27, "enable_pad": [2, 15, 21, 26, 28, 29, 30, 31, 32, 33, 34, 43, 48, 68, 70, 104, 155], "enable_paged_kv_cach": 144, "enable_partial_reus": [93, 155], "enable_pdl": 104, "enable_piecewise_cuda_graph": [104, 155], "enable_prompt_adapt": [155, 160], "enable_qkv": 142, "enable_sleep": 155, "enable_think": [24, 76], "enable_tqdm": 155, "enable_trt_overlap": 160, "enable_trtllm_sampl": 102, "enable_ucx": 160, "enable_userbuff": [104, 155], "enable_xqa": 160, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 117], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 113, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [23, 146, 160], "enc_dec": 114, "encapsul": [79, 113, 114, 125, 141], "encdecmodelrunn": 146, "encod": [0, 4, 11, 12, 13, 23, 27, 40, 63, 79, 96, 113, 114, 141, 144, 146, 148, 149, 150, 155, 160], "encode_base64_content_from_url": 73, "encode_base64_imag": 73, "encoded_vocab": [0, 111], "encodedvocab": [0, 111], "encoder_hidden_st": [142, 143], "encoder_input_featur": 146, "encoder_input_id": 146, "encoder_input_len_rang": 160, "encoder_input_length": [141, 142, 146], "encoder_language_adapter_rout": 146, "encoder_max_input_length": [142, 146], "encoder_output": [142, 143, 146], "encoder_output_length": 146, "encoder_run": 146, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 143], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [2, 10, 18, 20, 21, 28, 29, 31, 32, 33, 34, 43, 88, 101, 109, 110, 126, 151, 155], "encourag": [0, 16, 26, 63, 114, 127, 155], "end": [0, 1, 8, 11, 14, 19, 21, 22, 23, 33, 40, 42, 63, 79, 81, 90, 98, 105, 113, 114, 125, 132, 135, 139, 140, 141, 144, 155, 160, 165, 177], "end_dim": 141, "end_id": [146, 155, 160], "end_tag": 90, "end_thinking_phase_token": 155, "end_token": [0, 155], "endeavor": [13, 16, 17], "endid": [0, 1], "endpoint": [9, 18, 21, 50, 51, 88, 96, 155, 159, 160], "endswith": [126, 155], "enforc": [133, 141, 155], "engag": 11, "engin": [0, 1, 6, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 42, 43, 47, 53, 54, 67, 80, 81, 88, 89, 93, 98, 99, 110, 111, 113, 114, 115, 118, 121, 122, 127, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 151, 155, 157, 160, 164, 165, 168], "engine_buff": 146, "engine_dir": [22, 122, 123, 124, 125, 127, 132, 133, 146, 151], "engine_inspector": 146, "engine_llama_3": 125, "engine_nam": 146, "engine_output": 23, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "england": 90, "english": [19, 29, 33], "enhanc": [2, 8, 11, 13, 14, 15, 16, 17, 22, 39, 40, 63, 79, 85, 86, 88, 96, 112, 114, 121, 140, 147, 153, 155, 163, 170, 171], "enjoi": [52, 57, 58, 60, 109, 129, 152, 154, 159], "enough": [2, 14, 21, 62, 79, 93, 104, 113, 117, 138, 147, 160, 162, 178], "enqueu": [0, 92, 111, 125, 146, 147, 160], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 111], "enroot": 106, "enroot_allow_dev": 30, "ensembl": 17, "ensur": [8, 9, 10, 12, 14, 16, 18, 20, 22, 28, 29, 31, 32, 33, 34, 36, 40, 41, 42, 82, 88, 92, 106, 110, 111, 112, 115, 120, 127, 132, 138, 155, 156, 159, 161, 166, 177], "enter": [11, 43, 98, 106, 115, 128, 138, 177], "enterpris": [21, 56], "entir": [0, 3, 8, 11, 13, 16, 40, 42, 43, 82, 90, 99, 103, 111, 118, 125, 132, 141, 147, 155, 166, 177], "entri": [0, 1, 8, 11, 20, 22, 24, 36, 37, 40, 42, 60, 85, 106, 118, 132, 141, 160, 170], "entrypoint": [27, 128, 133, 154], "enum": [0, 1, 11, 90, 144, 155], "enumer": [0, 1, 53, 54, 59, 63, 68, 69, 70, 103], "env": [27, 29, 30, 33, 34, 38, 44, 45, 46, 48, 50, 51, 83, 88, 132, 155, 167], "env_overrid": 155, "envelop": 16, "environ": [2, 8, 9, 12, 13, 16, 17, 20, 27, 28, 29, 30, 31, 32, 33, 34, 35, 41, 42, 45, 61, 64, 65, 66, 73, 75, 79, 85, 105, 106, 109, 114, 119, 121, 131, 132, 133, 135, 138, 139, 151, 153, 154, 155, 160, 163, 170], "environment": 126, "eo": [22, 26, 28, 29, 30, 31, 32, 33, 34, 63, 114, 155], "eof": [2, 9, 14, 16, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 48, 65, 90, 99], "eos_id": [16, 22], "eos_token": 63, "eos_token_id": [63, 111, 146], "ep": [2, 8, 12, 13, 14, 17, 20, 21, 22, 27, 28, 42, 43, 105, 112, 132, 141, 142, 160], "ep16": 20, "ep2": 13, "ep2tp4": 13, "ep32": [12, 16, 20], "ep4": [16, 20], "ep4tp2": 13, "ep8": [15, 16, 20], "ep8tp8": 13, "ep_load_balanc": [16, 99], "ep_siz": [9, 16, 18, 21, 24, 26, 27, 29, 43, 48], "epd": 157, "eplb": [8, 28, 160], "epsilon": [0, 141], "eq": 141, "equal": [0, 1, 8, 12, 15, 16, 23, 26, 28, 29, 31, 32, 47, 64, 65, 66, 88, 99, 102, 104, 111, 112, 134, 141, 142, 147, 155], "equal_progress": [140, 155], "equat": [6, 141], "equilibr": 8, "equival": [11, 13, 15, 23, 104, 108, 135, 141, 156, 161], "era": 158, "erenup": 160, "eri": 33, "err": [64, 65, 66], "error": [0, 1, 15, 18, 21, 22, 23, 24, 27, 28, 29, 31, 32, 33, 34, 38, 43, 61, 83, 88, 106, 109, 110, 111, 118, 127, 133, 138, 146, 147, 155, 160, 167], "errormsg": 0, "especi": [10, 11, 12, 14, 16, 17, 19, 20, 23, 52, 57, 58, 60, 88, 109, 110, 115, 134, 138, 144, 152, 154, 159, 177], "essenti": [11, 16, 39, 40, 42, 106, 121, 132], "establish": [0, 12, 15, 16, 17, 39, 88, 110], "estim": [16, 78, 132, 160, 178], "et": 3, "etc": [0, 1, 10, 16, 21, 29, 33, 36, 40, 41, 42, 61, 87, 99, 104, 121, 131, 132, 135, 139, 146, 147, 151, 154, 155, 156, 161], "ethnzhng": 160, "etp": 99, "euo": [29, 30, 33, 34], "eval": [10, 28, 29, 31, 32, 56, 105, 159], "evalu": [2, 4, 5, 8, 11, 12, 15, 17, 24, 30, 33, 34, 88, 91, 96, 105, 119, 159, 160], "even": [0, 7, 10, 11, 13, 16, 17, 19, 20, 21, 23, 27, 33, 38, 79, 88, 98, 100, 104, 113, 114, 125, 127, 133, 138, 141, 144, 146, 147, 155], "evenli": [13, 99, 112], "event": [0, 1, 10, 155, 160], "event_buffer_max_s": 155, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "eventu": [120, 155], "ever": [0, 103, 139], "everi": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 28, 37, 42, 63, 99, 103, 104, 111, 126, 132, 133, 134, 141, 146], "everydai": 33, "everyon": [10, 11, 14], "everyth": [103, 125], "evict": [0, 1, 11, 14, 22, 42, 62, 93, 98, 116, 117, 118, 132, 133, 138, 160], "evidenc": 8, "evolut": 105, "evolv": [13, 113, 127, 177], "ewr": 29, "ex": [65, 66], "exact": [2, 79, 113, 147], "exact_match": [28, 31, 32], "exactli": [10, 37, 92, 103], "exam": [11, 13], "examin": [16, 92, 121], "exampl": [0, 3, 5, 7, 9, 10, 12, 14, 16, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 42, 43, 47, 53, 54, 56, 61, 64, 65, 66, 69, 70, 76, 78, 79, 82, 84, 85, 87, 88, 90, 91, 93, 94, 95, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 113, 114, 115, 117, 120, 121, 122, 123, 127, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 147, 149, 150, 151, 152, 153, 155, 156, 157, 159, 160, 161, 163, 166, 168, 169, 170, 172, 173, 175, 176, 178], "example_cuda_graph_config": 68, "example_function_nam": 90, "example_kv_cache_config": 68, "example_nam": 90, "example_valu": 90, "exaon": [100, 126, 150, 157, 160], "exaone4forcausallm": [150, 157], "exc": 59, "exce": [0, 8, 19, 104, 140, 141, 155], "exceed": [0, 147, 155], "excel": [11, 12, 19, 20], "except": [0, 1, 10, 11, 13, 14, 16, 20, 23, 63, 76, 79, 98, 99, 103, 111, 113, 114, 127, 134, 141, 144, 151, 155, 160], "excess": [16, 79, 113], "exchang": [105, 155], "excit": [52, 57, 58, 59, 60, 109, 152, 154, 159], "excl": [26, 28, 29, 31, 32], "exclud": [1, 10, 30, 104, 135, 141, 155, 160], "exclude_default": 155, "exclude_input_from_output": 155, "exclude_modul": [124, 155, 160], "exclude_non": 155, "exclude_unset": 155, "excludeinputfromoutput": 0, "exclus": [1, 10, 82, 103, 114, 149, 160, 166], "exec": [28, 31, 32, 41, 131, 159], "execut": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 26, 28, 29, 30, 31, 32, 37, 38, 40, 41, 42, 61, 64, 68, 70, 80, 85, 88, 90, 91, 92, 97, 98, 99, 104, 111, 114, 118, 121, 125, 127, 131, 132, 138, 140, 141, 146, 147, 155, 158, 162, 164, 168, 170, 178], "executeloopbackrequest": 0, "executor": [1, 10, 17, 39, 47, 67, 88, 95, 96, 103, 110, 117, 121, 122, 132, 140, 146, 147, 155, 160, 162, 173], "executor_config": 177, "executorconfig": [0, 88, 111, 122, 155], "executorexamplefastlogit": 160, "exhaust": [0, 17, 81, 165], "exhibit": [8, 19], "exist": [1, 10, 11, 13, 15, 16, 19, 20, 23, 28, 29, 31, 32, 38, 39, 40, 42, 61, 75, 79, 84, 87, 95, 106, 109, 114, 117, 118, 121, 126, 127, 132, 146, 155, 158, 160, 163, 169, 172, 173], "exist_ok": 61, "exit": [11, 16, 20, 27, 43, 146], "exp": 141, "expand": [0, 5, 7, 11, 12, 14, 20, 85, 86, 104, 105, 141, 146, 155, 160, 170, 171], "expand_dim": 141, "expand_dims_lik": 141, "expand_mask": 141, "expand_shap": 141, "expanded_idx_to_permuted_idx": 141, "expandinputrow": 12, "expandinputrowskernel": 20, "expandtab": 155, "expans": 141, "expect": [0, 5, 9, 10, 14, 16, 17, 18, 20, 23, 28, 29, 31, 32, 33, 34, 42, 47, 61, 62, 64, 65, 66, 78, 79, 80, 93, 103, 113, 114, 119, 123, 125, 126, 127, 132, 133, 136, 137, 141, 144, 151, 155, 160, 164, 168], "expend": 11, "expens": [17, 88, 111, 121, 134, 135, 140], "experi": [6, 7, 10, 12, 13, 15, 16, 17, 19, 20, 21, 28, 29, 31, 32, 33, 34, 40, 41, 62, 63, 81, 105, 120, 121, 131, 151, 154, 158, 160, 165], "experiment": [8, 14, 26, 27, 36, 64, 65, 66, 114, 155, 160, 170], "experiment_config": [82, 166], "experimentconfig": [81, 82, 165, 166], "expert": [2, 8, 21, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 43, 60, 85, 88, 104, 105, 110, 118, 139, 155, 158, 160, 170], "expert_scale_factor": 141, "expert_statist": 16, "expert_statistic_eplb": 16, "expert_statistic_iter_rang": 16, "expert_statistic_path": 16, "expertid": 16, "expertis": [8, 13, 15, 16, 17, 20], "expir": [0, 93], "explain": [15, 19, 33, 36, 37, 79, 88, 92, 98, 110, 114, 125, 138, 141, 147, 149, 162, 163], "explan": [2, 15, 21, 28, 29, 31, 32, 36, 37, 139, 146, 147], "explicit": [0, 1, 16, 27, 104, 121, 141, 160], "explicit_draft_token": [23, 121, 143], "explicitdrafttoken": [0, 1], "explicitdrafttokensdtyp": 1, "explicitdrafttokensinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 12, 15, 16, 23, 28, 29, 47, 82, 87, 104, 110, 115, 121, 125, 126, 155, 160, 166, 172], "explor": [11, 12, 13, 15, 16, 20, 102, 105, 121, 158, 159], "expon": 4, "exponenti": [17, 121], "export": [2, 9, 13, 14, 16, 20, 23, 35, 42, 50, 51, 64, 65, 66, 82, 85, 86, 104, 106, 124, 127, 132, 145, 146, 151, 154, 160, 166, 170, 171], "export_fmt": [100, 175], "expos": [0, 10, 11, 18, 20, 21, 36, 82, 87, 106, 114, 125, 129, 135, 159, 160, 166, 168, 172], "exposur": 104, "express": [0, 104, 111, 141, 155], "extend": [0, 11, 13, 14, 15, 16, 19, 20, 39, 92, 101, 111, 117, 125, 139, 141, 154, 155, 158, 160], "extend_ctx": 103, "extended_runtime_perf_knob_config": [155, 160], "extendedruntimeperfknobconfig": [0, 105, 155], "extens": [17, 20, 37, 42, 87, 104, 124, 132, 155, 160, 172], "extent": 20, "extern": [0, 10, 11, 61, 66, 92, 115, 116, 126, 146, 147, 159, 160], "external_checkpoint_dir": 126, "external_kei": 126, "external_weight": 126, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 63, "extra": [0, 2, 10, 11, 12, 13, 14, 19, 20, 23, 26, 27, 37, 40, 42, 43, 48, 66, 78, 79, 81, 82, 88, 97, 103, 104, 110, 113, 117, 121, 124, 132, 134, 135, 144, 146, 155, 160, 165, 166, 173, 174], "extra_arg": [30, 65], "extra_bodi": [75, 95, 173], "extra_config": 104, "extra_encoder_opt": 27, "extra_id": 117, "extra_llm_api_fil": [26, 28, 29, 31, 32, 33, 34], "extra_llm_api_opt": [2, 9, 14, 16, 18, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 42, 43, 48, 65, 76, 80, 88, 90, 95, 99, 103, 104, 132, 164, 168, 173], "extra_llm_api_options_eplb": 16, "extra_llm_config": 27, "extra_options_yaml_fil": 30, "extra_resource_manag": 155, "extra_token": 142, "extract": [0, 10, 11, 16, 22, 28, 31, 32, 41, 85, 103, 106, 111, 131, 136, 137, 141, 146, 155, 170], "extrapol": 141, "extrem": [8, 13, 16, 20, 98, 103, 125, 135, 138, 139], "f": [0, 18, 29, 41, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 70, 71, 75, 76, 79, 90, 109, 113, 114, 128, 131, 133, 140, 141, 151, 152, 154, 155, 159], "fabric": [88, 110, 160], "face": [16, 24, 28, 29, 31, 32, 33, 34, 42, 47, 85, 86, 100, 101, 111, 118, 122, 127, 132, 143, 155, 159, 160, 170, 171], "facil": 39, "facilit": [16, 17, 103, 115, 121, 159], "fact": [29, 33, 42, 103, 132, 139], "factor": [7, 12, 15, 16, 20, 63, 134, 135, 141, 142, 147, 149, 160], "factori": [81, 84, 127, 146, 155, 160, 165, 169], "factual": 114, "fahrenheit": 90, "fail": [0, 8, 16, 18, 20, 21, 28, 29, 31, 32, 33, 34, 38, 76, 88, 110, 146, 147, 151, 155, 160, 178], "fail_fast_on_attention_window_too_larg": [27, 146, 155], "failfastonattentionwindowtoolarg": 0, "failur": [16, 88, 110, 126, 155, 160], "fairli": 125, "fairseq": [150, 160], "fake": [104, 117, 160], "fakebuff": 1, "falcon": [7, 42, 124, 132, 149, 150, 160], "falconconfig": 143, "falconforcausallm": 143, "falconmodel": 143, "fall": [12, 40, 43, 119, 160], "fallback": [90, 103, 126, 155, 160], "fals": [0, 1, 9, 11, 13, 15, 18, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 53, 56, 61, 63, 65, 70, 76, 78, 80, 81, 84, 90, 93, 99, 103, 104, 111, 113, 114, 115, 117, 124, 141, 142, 143, 144, 145, 146, 155, 160, 164, 165, 168, 169], "false_output_valu": 141, "false_valu": 141, "famili": [16, 35, 37, 104, 113, 126, 150, 160], "familiar": [80, 114, 125, 133, 134, 136, 137, 154, 164], "famou": [29, 33, 114], "faq": 105, "far": [0, 11, 14, 111], "fast": [0, 9, 10, 16, 18, 19, 20, 21, 62, 79, 85, 113, 116, 121, 132, 134, 146, 155, 160, 170], "fast_build": [23, 155, 160], "fastapi": 160, "fastapi_serv": 160, "faster": [4, 5, 12, 14, 15, 20, 21, 23, 29, 43, 61, 62, 79, 103, 113, 127, 133, 141], "fastest": 19, "fastlogit": 0, "fault": [16, 160], "favor": [8, 160], "favorit": 67, "fc": [124, 125, 126, 151], "fc1_latent_proj": 166, "fc2": [12, 155], "fc2_latent_proj": 166, "fc_gate": 142, "fc_gate_dora": 142, "fc_gate_lora": 142, "fc_gate_plugin": 142, "fd": 0, "featur": [0, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 28, 29, 31, 32, 37, 38, 42, 43, 61, 64, 65, 66, 86, 88, 93, 94, 98, 102, 104, 106, 108, 110, 111, 113, 115, 116, 118, 119, 121, 124, 125, 127, 132, 135, 138, 139, 140, 141, 144, 146, 150, 154, 155, 156, 161, 163, 171, 176], "feature_dim": 146, "feb": 11, "februari": 15, "fed": [43, 96, 143], "feed": [112, 141], "feedback": [16, 101, 160], "feel": [33, 67], "fetch": [0, 10, 14, 27, 40, 90, 155, 162], "few": [7, 14, 15, 16, 30, 33, 88, 98, 103, 109, 110, 117, 125, 127, 138], "fewer": [3, 8, 19, 79, 98, 104, 113, 121, 155, 163], "fewshot": 24, "fewshot_as_multiturn": 24, "ffn": [13, 91, 112], "ffn_hidden_s": 142, "fhma": 160, "field": [0, 11, 12, 27, 28, 30, 42, 47, 61, 79, 82, 88, 90, 103, 110, 114, 119, 124, 127, 129, 132, 135, 143, 144, 146, 149, 155, 160, 163, 166], "field_nam": 155, "fieldinfo": 155, "fifo": [16, 20], "figur": [8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 39, 88, 94, 104], "file": [0, 2, 9, 14, 16, 18, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 49, 50, 51, 61, 76, 80, 81, 87, 90, 92, 95, 99, 103, 109, 111, 112, 113, 115, 117, 124, 125, 126, 127, 131, 132, 146, 155, 156, 160, 161, 164, 165, 168, 172, 173], "file_path": [61, 92], "file_prefix": 155, "filedesc": 0, "filenam": [0, 22, 26, 28, 29, 30, 31, 32, 33, 34, 92, 106], "filepath": 1, "filesystem": [0, 1, 92], "fill": [1, 10, 11, 52, 79, 93, 109, 126, 129, 141, 152, 154, 155, 159, 163], "fill_attention_const_params_for_long_rop": 142, "fill_attention_const_params_for_rop": 142, "fill_attention_param": 142, "fill_none_tensor_list": 142, "fill_valu": 141, "fillchar": 155, "fillemptyfieldsfromruntimedefault": 0, "filloper": 141, "filltaskstensor": 1, "filter": [19, 24, 28, 31, 32, 87, 103, 172], "filter_medusa_logit": 146, "filter_weight": [87, 172], "final": [0, 1, 8, 10, 11, 12, 13, 14, 16, 17, 20, 21, 23, 26, 28, 29, 31, 32, 40, 47, 91, 118, 128, 141, 155, 160, 178], "final_logit_softcap": 143, "final_output_id": 146, "finalize_decod": 146, "finalizemoerout": 12, "finalizemoeroutingkernel": 20, "find": [2, 8, 10, 11, 12, 15, 16, 17, 61, 90, 102, 104, 105, 109, 135, 141, 151, 155, 160], "find_best_medusa_path": 146, "fine": [2, 15, 16, 36, 95, 99, 121, 132, 139, 142, 155, 158, 173], "finer": 115, "finetun": 13, "finish": [0, 1, 10, 12, 14, 16, 20, 40, 47, 92, 111, 114, 116, 127, 132, 146, 155, 162, 177], "finish_reason": [18, 21, 28, 29, 30, 31, 32, 33, 34, 155, 159, 160], "finished_gen_req_id": [61, 92], "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 160], "first": [0, 1, 5, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 23, 27, 30, 33, 34, 37, 39, 42, 43, 54, 61, 62, 69, 79, 82, 87, 88, 92, 93, 96, 98, 101, 105, 106, 110, 111, 113, 114, 115, 117, 118, 121, 128, 132, 133, 135, 138, 139, 140, 141, 147, 151, 154, 155, 156, 160, 161, 163, 166, 172, 177, 178], "first_come_first_serv": [140, 155], "first_gen_token": 155, "first_lay": 146, "firstgentoken": 0, "firstit": 0, "firstli": [12, 15, 16, 98, 128, 138, 147], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [0, 1, 3, 4, 12, 27, 99, 101, 113, 134, 135, 146, 155, 178], "fitting_request": 178, "five": [19, 29], "fix": [10, 11, 14, 15, 17, 19, 20, 42, 88, 93, 94, 104, 110, 116, 118, 121, 132, 147], "fjosw": 160, "flag": [0, 1, 6, 9, 12, 16, 20, 21, 22, 24, 27, 28, 29, 31, 32, 33, 34, 42, 47, 80, 81, 98, 101, 111, 113, 118, 127, 132, 135, 136, 138, 140, 141, 147, 155, 160, 164, 165], "flags_siz": 1, "flan": [149, 150], "flash": [79, 86, 113, 125], "flashattent": [79, 113, 125], "flashinf": [28, 31, 32, 79, 80, 81, 82, 84, 86, 104, 155, 160, 163, 164, 165, 166, 168, 169, 171], "flashinfer_silu_and_mul": 104, "flashinferattent": [79, 163], "flashmla": [14, 160], "flatten": [1, 6, 16, 104, 118, 141, 142], "flattenedinouts": 1, "flattenn": 1, "flavor": 103, "flayer": 115, "flayerinfomemo": 115, "flexibl": [10, 13, 16, 21, 28, 31, 32, 39, 47, 82, 87, 90, 92, 103, 106, 121, 127, 154, 158, 166, 172], "flexibli": [11, 20], "flight": [1, 8, 42, 85, 88, 96, 105, 132, 138, 140, 147, 158, 160, 170], "flip": 141, "flip_sin_to_co": 142, "float": [0, 1, 4, 63, 70, 100, 114, 122, 124, 125, 140, 141, 142, 143, 146, 149, 155], "float16": [23, 115, 118, 122, 123, 124, 127, 134, 141, 143, 144, 151, 155], "float2": 141, "float32": [0, 23, 104, 124, 141, 142, 143, 144, 155], "float8_e5m2": [70, 155], "floattensor": [156, 161], "floattyp": [0, 1], "floor_div": 141, "floordiv": 141, "flop": [12, 15], "flow": [9, 11, 12, 13, 15, 17, 103, 104, 115, 126, 127, 133, 134, 135, 138, 139, 160, 162, 178], "fluctuat": [8, 28, 29, 31, 32, 33, 34, 88, 110], "flush": 12, "fly": [79, 113, 141, 149], "fmha": [0, 23, 98, 141, 144, 146, 147, 155, 160], "fmt_dim": 1, "focu": [7, 10, 11, 12, 13, 16, 41, 63, 101, 115, 131, 160], "focus": [11, 20, 21, 30, 33, 34, 36, 69, 101, 121, 132, 135, 136, 137, 160], "fold": 147, "folder": [0, 38, 81, 111, 114, 127, 133, 149, 150, 160, 165], "folder_trt_llm": 125, "follow": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 40, 42, 43, 47, 49, 53, 58, 59, 64, 65, 66, 79, 81, 82, 83, 84, 85, 86, 87, 88, 90, 91, 96, 99, 100, 102, 103, 104, 106, 109, 110, 111, 114, 115, 118, 120, 121, 123, 124, 125, 126, 127, 129, 132, 133, 134, 135, 136, 137, 138, 139, 141, 144, 149, 150, 154, 155, 156, 157, 159, 160, 161, 163, 165, 166, 167, 169, 170, 171, 172, 175, 176, 177], "footprint": [3, 15, 79, 100, 104, 113, 147], "for_each_rank": 143, "forbid": 155, "forc": [0, 11, 13, 16, 17, 38, 42, 62, 79, 88, 104, 113, 132, 155, 160, 166], "force_drop_id": 142, "force_dynamic_quant": 155, "force_multi_block_mod": 132, "force_nccl_all_reduce_strategi": 160, "force_num_profil": 155, "force_words_id": 114, "forcefulli": 10, "forecast": 121, "forev": 103, "forget": 33, "fork": [41, 131], "form": [0, 11, 12, 17, 20, 79, 99, 103, 111, 113, 121, 141, 155], "formal": 160, "format": [0, 4, 7, 12, 14, 15, 18, 22, 27, 28, 29, 30, 31, 32, 33, 34, 51, 76, 79, 86, 90, 96, 104, 105, 106, 111, 119, 124, 126, 127, 133, 135, 146, 147, 151, 155, 158, 160, 163, 171], "format_map": 155, "former": [7, 125], "formula": [12, 15, 17, 141], "forth": 16, "forthcom": [18, 21], "fortun": 10, "forum": 160, "forward": [0, 1, 10, 11, 14, 16, 17, 20, 40, 61, 78, 79, 88, 92, 103, 104, 109, 112, 115, 121, 123, 125, 140, 141, 142, 143, 151, 156, 160, 161, 162, 163, 177, 178], "forward_loop": 132, "forward_with_cfg": 143, "forward_without_cfg": 143, "forwardasync": 1, "forwarddispatch": 1, "forwardref": 155, "forwardsync": 1, "found": [2, 4, 10, 11, 16, 20, 38, 53, 54, 61, 63, 79, 88, 92, 106, 110, 111, 112, 113, 114, 115, 121, 125, 132, 133, 135, 139, 149, 155, 159, 178], "foundat": [10, 11, 12, 14, 20], "four": [12, 13, 14, 54, 62, 87, 111, 115, 121, 124, 142, 172], "fourth": [93, 111], "fp": [149, 160], "fp16": [3, 4, 7, 12, 21, 23, 42, 79, 86, 100, 113, 118, 119, 122, 124, 126, 132, 135, 139, 141, 144, 150, 151, 160, 171], "fp32": [0, 13, 15, 23, 79, 86, 100, 113, 141, 144, 146, 150, 151, 155, 160, 171], "fp4": [2, 14, 15, 16, 20, 23, 28, 31, 32, 35, 71, 100, 104, 144, 150, 154, 160], "fp4_gemm": 120, "fp4_quantiz": 104, "fp4_quantize_2": 104, "fp4_quantize_3": 104, "fp8": [3, 5, 6, 7, 13, 14, 15, 16, 18, 19, 21, 22, 23, 26, 28, 30, 31, 32, 34, 35, 42, 48, 52, 57, 86, 90, 95, 102, 104, 105, 109, 119, 127, 132, 136, 139, 141, 144, 147, 150, 152, 154, 155, 157, 159, 160, 163, 171, 173, 175, 176], "fp8_block_scal": 155, "fp8_blockscale_gemm": 160, "fp8_inputs_overrid": 141, "fp8_kv_cach": [79, 113, 149], "fp8_pb_wo": 160, "fp8_per_channel_per_token": 155, "fp8_qdq": 149, "fp8_rowwise_gemm_plugin": [23, 144], "fp_valu": [79, 113], "fpa_intb": 160, "frac": [8, 17, 26, 28, 29, 31, 32], "fraction": [0, 17, 24, 27, 28, 29, 31, 32, 33, 34, 68, 80, 84, 93, 141, 142, 146, 155, 164, 168], "fragment": 28, "framework": [8, 21, 85, 87, 101, 105, 121, 123, 124, 127, 141, 160, 170, 172], "franc": [34, 52, 53, 54, 57, 58, 59, 60, 68, 71, 90, 95, 109, 123, 125, 133, 140, 151, 152, 154, 159, 173], "francisco": 90, "free": [0, 1, 10, 11, 15, 16, 20, 24, 27, 28, 29, 31, 32, 33, 34, 53, 63, 68, 80, 93, 95, 98, 103, 116, 118, 125, 126, 138, 142, 143, 146, 147, 155, 160, 164, 168, 173, 177], "free_gpu_memory_fract": [9, 27, 30, 47, 48, 53, 68, 70, 71, 93, 140, 155, 160], "free_hostfunc_user_data": 10, "free_mem_ratio": [80, 84, 164, 168, 169], "free_resourc": [103, 162, 177], "freed": [8, 93, 103, 132], "freedom": 127, "freegpumemoryfract": [0, 147, 160], "freenumblock": [0, 27], "freez": 15, "french": [95, 173], "freq": 141, "frequenc": [0, 42, 132, 142, 155], "frequency_penalti": [146, 155, 160], "frequencypenalti": [0, 1, 114], "frequent": [8, 10, 11, 20, 36, 99, 117, 151, 155], "friend": [0, 1, 42, 132], "friendli": [16, 104, 141], "from": [0, 1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 43, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 82, 84, 86, 87, 88, 90, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 132, 133, 134, 135, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 151, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 166, 169, 171, 172, 173, 174, 175, 176, 177, 178], "from_argu": [143, 144], "from_attribut": 155, "from_checkpoint": [127, 143], "from_config": 143, "from_dict": [143, 155], "from_dir": 146, "from_engin": 146, "from_hugging_fac": [123, 126, 127, 143], "from_jax": 127, "from_json_fil": [143, 155], "from_kera": 127, "from_kwarg": 155, "from_meta_ckpt": [127, 143], "from_model_config_cpp": 146, "from_nemo": [127, 143], "from_orm": 155, "from_pretrain": [11, 143], "from_prun": 143, "from_pybind": 155, "from_serialized_engin": 146, "from_str": 141, "fromfil": 125, "front": [12, 155], "frontier": [8, 158], "fruit": 15, "fu": 11, "full": [0, 4, 5, 12, 14, 15, 16, 17, 24, 26, 27, 28, 29, 31, 32, 33, 34, 37, 41, 42, 61, 63, 79, 82, 92, 93, 99, 112, 113, 114, 117, 118, 121, 131, 132, 134, 141, 146, 147, 151, 155, 158, 159, 160, 166], "full_path_to_work_dir": 30, "full_stop_token": 63, "fullgraph": 104, "fulli": [15, 37, 52, 92, 104, 160], "fun": [29, 33], "funcnam": 0, "function": [0, 1, 10, 11, 13, 14, 16, 20, 27, 33, 34, 36, 39, 40, 41, 68, 78, 79, 82, 85, 87, 90, 101, 102, 103, 104, 111, 113, 122, 123, 125, 127, 131, 139, 143, 144, 146, 147, 149, 150, 151, 154, 155, 158, 160, 166, 170, 172, 177, 178], "function_nam": 90, "functiont": 0, "functool": 155, "fundament": [8, 20], "further": [3, 7, 10, 11, 14, 15, 16, 17, 18, 23, 42, 79, 88, 96, 99, 103, 111, 112, 113, 121, 125, 132, 135, 139, 144, 163], "furthermor": [13, 16, 17, 88, 121, 135], "fuse": [10, 13, 15, 20, 21, 23, 79, 86, 96, 104, 113, 121, 125, 139, 141, 144, 156, 160, 161, 163, 171], "fuse_a": [13, 15], "fuse_fp4_qu": [23, 144], "fuse_qkv_project": 143, "fuseattentionwithbiaspass": 115, "fused_a": 99, "fused_gate_up_dora": 142, "fused_gate_up_lora": 142, "fused_mo": 155, "fusedgatedmlp": [141, 142], "fusedmo": 160, "fusevalu": 1, "fusion": [15, 23, 79, 85, 86, 105, 115, 138, 144, 147, 149, 155, 160, 163, 170, 171], "fusion_op": 141, "futur": [7, 10, 12, 16, 23, 33, 34, 35, 39, 42, 52, 53, 54, 56, 57, 58, 59, 60, 63, 68, 69, 71, 79, 93, 98, 103, 105, 106, 109, 110, 113, 114, 116, 120, 121, 126, 127, 132, 133, 140, 141, 147, 152, 153, 154, 155, 159, 160], "fuyu": [150, 160], "fw": [87, 172], "fx": 104, "g": [0, 10, 11, 14, 15, 20, 21, 24, 26, 29, 30, 35, 36, 38, 40, 42, 64, 65, 66, 78, 82, 87, 90, 92, 97, 98, 99, 104, 107, 109, 111, 116, 119, 126, 132, 138, 146, 155, 156, 159, 161, 166, 174], "g00": 8, "g01": 8, "g0m": 8, "g1": [98, 138], "g10": 8, "g11": 8, "g1m": 8, "g2": [98, 138], "gain": [8, 12, 16, 19, 40, 91, 134, 138], "game": 18, "gamma": 141, "gap": [8, 10, 11, 17, 19, 20], "garbag": [20, 36, 155], "garbage_collection_gen0_threshold": [36, 155], "gate": [23, 118, 126, 133, 141, 144, 160], "gate_a": 141, "gate_a_bia": 141, "gate_bia": 141, "gate_proj": [126, 166], "gate_up_proj": 160, "gate_x": 141, "gate_x_bia": 141, "gatedmlp": [141, 142], "gather": [0, 1, 23, 58, 59, 141, 146, 155, 166], "gather_all_token_logit": [23, 160], "gather_context_logit": [23, 143, 146, 155], "gather_dim": [125, 141], "gather_generation_logit": [23, 143, 146, 155], "gather_last_token_logit": 141, "gather_nd": 141, "gather_output": 142, "gathercontext": [0, 160], "gatheredid": 1, "gatherel": 141, "gathergenerationlogit": 0, "gathermod": 141, "gathertre": 1, "gatherv2": 141, "gb": [5, 15, 62, 88, 106, 110, 132, 155], "gb200": [8, 12, 15, 17, 20, 21, 28, 29, 35, 43, 88, 99, 105, 110, 150, 158, 160], "gb300": 29, "gc": 20, "gcc": [106, 160], "gd": 0, "geforc": 160, "gegelu": 141, "gegelu_limit": 142, "geglu": 141, "gelu": [141, 143], "gelu_pytorch_tanh": 160, "gelu_tanh": 142, "gemm": [10, 15, 16, 20, 23, 85, 86, 99, 100, 115, 138, 141, 144, 147, 155, 160, 170, 171], "gemm_allreduc": 141, "gemm_allreduce_plugin": [23, 144, 146], "gemm_fc1": 13, "gemm_k": 12, "gemm_plugin": [23, 118, 122, 124, 125, 132, 135, 139, 142, 144], "gemm_q": 12, "gemm_qkv": 12, "gemm_swiglu": 141, "gemm_swiglu_plugin": [23, 135, 144], "gemm_v": 12, "gemma": [86, 100, 127, 148, 149, 150, 157, 158, 160, 171], "gemma2": 150, "gemma2_added_field": 143, "gemma2_config": 143, "gemma3": [87, 160, 172], "gemma3_added_field": 143, "gemma3_config": 143, "gemma3_weight_mapp": [87, 172], "gemma3forcausallm": [87, 150, 157, 172], "gemma3forconditionalgener": [150, 157], "gemma3hfweightmapp": [87, 172], "gemma_added_field": 143, "gemma_config_kwarg": 143, "gemmaconfig": 143, "gemmaforcausallm": 143, "gen": [8, 10, 17, 30, 104, 155, 160], "gen2dep4": 17, "gen4": 17, "gen8": 17, "gen_extra": 88, "gen_kwarg": [28, 31, 32], "gen_onli": 30, "genai": [7, 77, 105], "genattent": 13, "genenginepath": 0, "gener": [0, 1, 2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 52, 53, 54, 55, 61, 62, 68, 69, 70, 71, 78, 80, 81, 85, 86, 90, 91, 92, 93, 95, 98, 100, 101, 103, 105, 107, 109, 111, 114, 117, 121, 124, 125, 126, 127, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 143, 146, 147, 150, 151, 152, 155, 156, 158, 159, 160, 161, 162, 163, 164, 165, 170, 171, 173, 175, 176, 177, 178], "generate_alibi_bias": 141, "generate_alibi_slop": 141, "generate_async": [11, 47, 58, 59, 90, 155, 160], "generate_eplb_config": 16, "generate_logn_sc": 141, "generate_tllm_weight": 126, "generate_with_stream": 36, "generated_text": [53, 54, 67, 133, 140], "generatejsonschema": 155, "generation_complet": 178, "generation_control": 11, "generation_dir": 11, "generation_in_progress": 178, "generation_kwarg": 11, "generation_kwargs_list": 11, "generation_logit": [146, 155], "generation_onli": 155, "generation_phas": [79, 113], "generation_request": 178, "generation_serv": [17, 88], "generation_task": 11, "generation_to_complet": 178, "generation_with_dynasor_cot": 11, "generationexecutor": [88, 110, 160], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 143, "generationoutput": 36, "generationresult": [11, 155], "generationresultbas": 155, "generationsequ": 146, "generationsess": [113, 146, 147], "generationstep": 1, "generationtask": 11, "genericprompttuningparam": 1, "genert": 110, "genexecutorconfig": 0, "genidx": 0, "genlengthlogitsprocessor": 63, "genlenthlogitsprocesor": 63, "genreqr": 17, "genrequest": 1, "geograph": [29, 33], "geographi": [33, 90], "germani": 90, "get": [0, 1, 6, 11, 14, 15, 16, 18, 20, 21, 26, 27, 28, 29, 31, 32, 41, 47, 61, 69, 73, 75, 79, 82, 90, 94, 103, 104, 106, 109, 111, 113, 115, 118, 122, 126, 128, 129, 131, 133, 135, 141, 146, 151, 155, 158, 159, 160, 166, 175, 178], "get_1d_sincos_pos_embed_from_grid": 142, "get_2d_sincos_pos_emb": 142, "get_2d_sincos_pos_embed_from_grid": 142, "get_audio_featur": 146, "get_batch_cache_indic": 177, "get_batch_idx": 146, "get_block_offset": 146, "get_buff": 177, "get_comm": 155, "get_config_group": 143, "get_context_phase_param": 155, "get_current_d": 90, "get_current_weath": 90, "get_default_config_load": [87, 172], "get_default_weight_load": [87, 172], "get_executor_config": 155, "get_finish": [61, 92], "get_first_past_key_valu": 142, "get_hf_config": 143, "get_indices_block_s": 155, "get_initialized_weight_mapp": [87, 172], "get_input": 115, "get_kv_cache_ev": 155, "get_kv_cache_events_async": 155, "get_max_resource_count": [177, 178], "get_needed_resource_to_complet": [177, 178], "get_next_medusa_token": 146, "get_num_free_block": 177, "get_num_heads_kv": 146, "get_num_new_matched_token": [61, 92], "get_output": [115, 125], "get_par": [115, 141], "get_pybind_enum_field": 155, "get_pybind_variable_field": 155, "get_request_typ": 155, "get_rope_index": 146, "get_runtime_s": 155, "get_seq_idx": 146, "get_shap": 126, "get_slic": 126, "get_stat": [155, 160], "get_stats_async": 155, "get_timestep_embed": 142, "get_token": 61, "get_us": [115, 141], "get_visual_featur": 146, "get_vocab": [0, 111], "get_weight": 142, "get_zcopi": [88, 110], "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getattentiondpeventsgatherperiodm": 0, "getattr": [11, 63], "getbackend": 0, "getbackendagentdesc": 0, "getbackendtyp": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcacheindirectioninput": 1, "getcacheindirectionoutput": 1, "getcachesaltid": 0, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconf": 20, "getconfig": 0, "getconnect": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglebuff": 1, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getexplicitdrafttokensbuff": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfailfastonattentionwindowtoolarg": 0, "getfastlogit": 0, "getfd": 0, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenerationstep": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 122], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethasindexerkcach": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getindexerdimperhead": 0, "getindexerkcachequantblocks": 0, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getitem": 104, "getitem_10": 104, "getitem_11": 104, "getitem_12": 104, "getitem_9": 104, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getkvtransfersenderfuturetimeoutm": 0, "getkvtransfertimeoutm": 0, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 111], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalconnectioninfo": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadbuff": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxgputotalbyt": 0, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumsequ": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmaxtokensinbuff": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getmultimodalhash": 0, "getmultimodalinput": 0, "getmultimodallength": 0, "getmultimodalposit": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsforgivenlay": 1, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getpromptignorelength": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 111], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 114, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuseuvm": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvirtualmemoryalloc": 1, "getvirtualmemorymanag": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": [37, 158, 160], "ghz": 56, "gib": [117, 147], "gid": [0, 38], "gigabyt": 5, "gij": 8, "gil": 20, "git": [2, 9, 18, 38, 100, 106, 118, 151, 154, 175], "github": [2, 11, 13, 16, 18, 22, 28, 29, 31, 32, 36, 63, 85, 100, 105, 106, 107, 127, 160, 170, 175], "give": [9, 12, 14, 15, 19, 20, 21, 33, 76, 82, 90, 102, 111, 135, 138, 143, 166, 176], "given": [0, 1, 2, 5, 8, 10, 16, 20, 29, 37, 41, 90, 93, 98, 100, 103, 111, 114, 118, 126, 127, 131, 136, 137, 138, 141, 142, 143, 145, 146, 147, 149, 155, 160, 177], "givyboi": 67, "glm": [141, 150, 160], "glm4": 160, "glob": 37, "global": [0, 8, 12, 13, 15, 19, 20, 104, 113, 116, 125, 160], "global_max_input_length": 146, "global_max_output_length": 146, "globalrequestid": 0, "glossari": [3, 6], "gm": [82, 151, 166], "gn0": 8, "gn1": 8, "gnm": 8, "gnu": 106, "go": [10, 11, 33, 79, 98, 113, 114, 134, 160], "goal": [16, 21, 140], "goe": [14, 42, 132, 154], "golden": 10, "good": [2, 11, 15, 16, 20, 36, 42, 111, 125, 132, 134, 138, 139, 155], "googl": [86, 150, 157, 171], "got": [0, 1, 11, 33, 52, 56, 57, 58, 59, 60, 63, 67, 109, 132, 151, 152, 155, 159], "gp": 33, "gpqa": [13, 15, 24, 29], "gpt": [1, 4, 7, 23, 35, 37, 42, 79, 100, 105, 113, 121, 125, 132, 141, 144, 147, 149, 150, 151, 157, 158, 160], "gpt2": [143, 151], "gpt3": 5, "gpt_attent": [6, 113, 115, 141, 160], "gpt_attention_plugin": [23, 118, 125, 132, 142, 144, 146, 151, 160], "gpt_attention_plugin_remove_pad": 115, "gpt_ib_ptun": 37, "gpt_oss": 29, "gpt_oss_output": 29, "gpt_variant": [143, 160], "gptattent": 115, "gptattentionpluginremovepaddingrewritepass": 115, "gptconfig": 143, "gptdecod": 114, "gptdecoderbatch": 160, "gptdecoderptr": 1, "gptforcausallm": 143, "gptj": 143, "gptjconfig": 143, "gptjforcausallm": 143, "gptjmodel": 143, "gptlmheadmodel": 151, "gptmanag": 160, "gptmanagerbenchmark": [106, 117, 160], "gptmodel": 143, "gptmodelconfig": 160, "gptneoxforcausallm": 143, "gptneoxmodel": 143, "gptossforcausallm": 157, "gptq": [7, 100, 150, 160], "gptsession": 160, "gptsessionbenchmark": 160, "gpu": [0, 1, 4, 5, 6, 7, 8, 9, 12, 14, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 43, 47, 53, 61, 62, 64, 65, 66, 68, 79, 80, 81, 85, 88, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 104, 105, 106, 109, 110, 111, 112, 113, 114, 116, 117, 118, 119, 121, 124, 127, 133, 134, 135, 139, 141, 143, 144, 146, 150, 151, 154, 155, 159, 160, 162, 163, 164, 165, 168, 170, 173, 174], "gpu_typ": 37, "gpu_weights_perc": [122, 146], "gpudirect": 0, "gpumemusag": [0, 27], "gpus_per_nod": [24, 27, 30, 155], "gpuspernod": [1, 114], "gpusync": 1, "gpuweightsperc": [0, 122], "gqa": [3, 6, 23, 79, 99, 105, 113, 116, 141, 144, 160, 163], "grace": [16, 20, 117, 150, 155], "grade": [86, 171], "gradient": 4, "gradual": [120, 127], "grain": [16, 99, 115], "gram": [103, 105, 121, 160], "grammar": [0, 105, 111, 155], "granit": [86, 150, 160, 171], "granular": [20, 36, 80, 164], "graph": [0, 2, 14, 15, 16, 19, 20, 26, 28, 29, 31, 32, 33, 34, 41, 42, 49, 68, 79, 81, 82, 85, 86, 88, 89, 102, 103, 105, 110, 125, 131, 132, 141, 146, 147, 148, 151, 155, 157, 160, 163, 165, 166, 168, 170, 171, 177], "graph_rewrit": 115, "graphic": [18, 61], "gratitud": 14, "gre": [27, 30], "great": [3, 8, 10, 12, 16, 18, 21, 29, 88], "greater": [0, 6, 7, 8, 13, 16, 79, 88, 93, 110, 113, 141, 155], "greatli": [10, 12, 14, 20, 117, 127, 135, 139], "greedi": [0, 9, 40, 69, 102, 103, 114, 155, 162], "greedy_sampl": 155, "greedysampl": 0, "greedysamplinghost": 1, "greener": 11, "grid": [125, 135, 138, 141, 142], "grid_search_engin": 133, "grid_siz": 142, "grok": [150, 160], "groovi": 37, "gross": 28, "ground": [41, 131], "group": [0, 3, 15, 16, 20, 28, 29, 31, 32, 33, 34, 36, 93, 104, 105, 111, 112, 114, 116, 125, 141, 142, 144, 149, 155, 160, 163], "group_cl": 143, "group_norm": 141, "group_rms_norm": 160, "group_siz": [124, 141, 155], "groupedrmsnorm": 13, "groupgemm": [15, 16], "groupnorm": [141, 142], "groupwis": 144, "grow": [1, 12, 17, 19, 79, 98, 121, 138], "gsm8k": [15, 28, 31, 32, 91], "gt": [141, 155], "gtc": [2, 13], "guarante": [0, 8, 10, 12, 16, 20, 42, 43, 85, 90, 101, 104, 105, 114, 117, 127, 132, 133, 135, 140, 170], "guaranteed_no_evict": [0, 22, 42, 132, 140, 155], "guaranteednoevictschedul": 178, "guard": 133, "guardian": [86, 171], "guid": [0, 2, 7, 9, 18, 21, 41, 43, 49, 55, 79, 80, 85, 89, 96, 100, 109, 125, 131, 133, 134, 135, 137, 139, 141, 151, 154, 155, 157, 158, 160, 163, 164, 170], "guidanc": [8, 20, 79, 104, 121, 139, 142, 143], "guided_decod": [56, 90, 155], "guided_decoding_backend": [56, 76, 90, 155], "guideddecod": 10, "guideddecodingbackend": [0, 90], "guideddecodingconfig": [0, 111], "guideddecodingparam": [0, 56, 90, 105, 111, 155], "guidedrequest": 10, "guidelin": [36, 85, 104, 134, 170], "guidetyp": [0, 111], "gw": 115, "h": [9, 11, 14, 18, 20, 21, 23, 27, 28, 29, 30, 31, 32, 33, 34, 44, 45, 46, 79, 81, 88, 111, 113, 121, 126, 133, 141, 143, 159, 160, 165, 168], "h0": 14, "h1": 141, "h100": [7, 18, 28, 35, 37, 43, 105, 127, 133, 135, 136, 137, 138, 158, 160], "h200": [4, 10, 28, 35, 43, 105, 158, 160], "ha": [0, 1, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 28, 29, 31, 32, 33, 34, 40, 41, 42, 43, 63, 79, 87, 88, 92, 93, 98, 101, 103, 104, 105, 106, 107, 111, 113, 117, 118, 119, 124, 125, 126, 127, 129, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 147, 149, 151, 155, 159, 160, 162, 172, 177, 178], "habitu": 37, "had": [2, 12, 15, 98, 127, 135, 138], "half": [0, 1, 15, 20, 125, 133, 141], "halv": [4, 141], "hand": [12, 37, 117, 121, 134], "handl": [0, 1, 3, 8, 10, 11, 12, 13, 17, 20, 21, 39, 40, 61, 64, 87, 88, 92, 94, 96, 98, 103, 104, 110, 112, 116, 126, 127, 133, 135, 138, 139, 140, 141, 142, 155, 156, 158, 161, 162, 166, 172], "handle_per_step": 146, "handler": [11, 92], "hang": [0, 10, 16, 88, 110, 151, 160], "hao": 11, "happen": [1, 10, 11, 16, 92, 93, 109, 111, 114, 117, 125, 147, 151], "happi": 146, "har": [15, 28, 31, 32], "hard": [11, 24, 79, 86, 104, 113, 155, 171], "harder": 114, "hardwar": [7, 12, 15, 18, 19, 21, 26, 30, 35, 37, 40, 47, 104, 105, 106, 116, 160], "has_affin": 141, "has_bia": 141, "has_config_group": 143, "has_position_embed": 146, "has_pp": 104, "has_scal": 141, "has_token_type_embed": 146, "has_zero_point": [124, 155], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 111], "hasgenawaitthread": 0, "hash": [0, 61, 92, 93, 96, 155], "hash_valu": [61, 92], "hashed_token": 61, "hasindexerkcach": 0, "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 160, "have": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 28, 29, 31, 32, 33, 34, 40, 41, 42, 43, 54, 61, 64, 65, 66, 67, 71, 79, 82, 86, 87, 88, 90, 92, 93, 94, 98, 100, 103, 104, 109, 110, 111, 112, 113, 114, 117, 118, 120, 121, 124, 125, 126, 127, 128, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 144, 146, 147, 150, 151, 154, 155, 156, 159, 160, 161, 166, 171, 172], "hbm": 105, "hbm3e": [5, 12], "hcxvisionforcausallm": [150, 157], "head": [1, 3, 14, 15, 19, 23, 81, 86, 88, 91, 93, 105, 114, 116, 121, 125, 132, 141, 142, 144, 155, 160, 163, 165, 171], "head_dim": [79, 163, 166, 177], "head_siz": [79, 113, 141, 143, 146, 160], "header": [110, 155], "headquart": 61, "headsiz": 141, "headsperlay": 1, "health": [18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 67, 105], "healthi": 9, "heard": 33, "heat": 114, "heavi": [20, 37, 139], "heavier": 134, "heavili": [8, 12, 16, 20], "height": [8, 26, 51, 142, 146], "held": [10, 20, 93], "helix": 105, "hello": [40, 52, 53, 54, 57, 58, 59, 60, 64, 67, 68, 95, 100, 102, 109, 133, 140, 152, 154, 159, 173, 175, 176], "help": [10, 12, 13, 14, 16, 17, 20, 23, 24, 27, 28, 29, 31, 32, 33, 34, 41, 42, 43, 44, 45, 56, 62, 63, 67, 68, 69, 70, 71, 72, 73, 76, 79, 81, 88, 90, 94, 98, 104, 106, 110, 111, 113, 115, 125, 131, 132, 133, 136, 137, 138, 139, 140, 141, 144, 154, 155, 159, 160, 162, 165, 168], "helper": [1, 37, 141], "henc": [10, 20, 104, 156, 161], "hendryck": 24, "here": [2, 4, 5, 10, 11, 14, 15, 16, 24, 26, 28, 29, 31, 32, 33, 34, 35, 36, 39, 47, 52, 56, 61, 63, 79, 87, 90, 93, 100, 102, 104, 106, 109, 111, 115, 118, 122, 123, 124, 125, 126, 127, 129, 131, 133, 134, 135, 138, 139, 141, 146, 147, 149, 151, 153, 155, 159, 163, 172, 177, 178], "hesit": 11, "heterogen": [8, 88, 110], "heurist": [15, 22, 38, 42, 79, 105, 113, 132, 141, 155, 160], "hf": [21, 22, 23, 24, 27, 42, 43, 52, 57, 58, 59, 60, 64, 65, 66, 75, 81, 85, 86, 87, 100, 109, 114, 118, 122, 126, 132, 133, 146, 150, 151, 152, 154, 155, 159, 165, 168, 170, 171, 172, 175], "hf_config_or_dir": 143, "hf_gemma3": [87, 172], "hf_home": 38, "hf_lora_convert": 118, "hf_model": [132, 143], "hf_model_card_or_dir": [84, 169], "hf_model_dir": [87, 122, 123, 124, 127, 143, 172], "hf_model_nam": 132, "hf_model_or_dir": 143, "hf_quant_config": [42, 132], "hf_token": [9, 42, 132], "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx": 9, "hfcheckpointload": [87, 155, 172], "hfconfigordir": 143, "hgx": 5, "hi": 118, "hidden": [0, 11, 12, 13, 14, 20, 21, 78, 79, 82, 99, 103, 111, 112, 113, 114, 118, 121, 141, 142, 155, 160, 166], "hidden_act": [124, 142, 143], "hidden_dim": [0, 79, 113, 141], "hidden_dim_per_head": [79, 98, 113, 141], "hidden_dtyp": 142, "hidden_s": [0, 12, 79, 82, 115, 124, 126, 141, 142, 143, 146, 156, 161, 163, 166], "hidden_size_in": 118, "hidden_size_out": 118, "hidden_size_per_head": 141, "hidden_st": [12, 78, 103, 123, 141, 142, 143, 146, 151, 156, 161], "hidden_states_for_emb": 143, "hiddens": [0, 1, 114], "hiddenst": 12, "hide": [10, 13, 15, 40], "hierarch": 124, "hierarchi": [27, 36, 127, 141], "high": [3, 7, 8, 10, 11, 13, 14, 15, 17, 18, 19, 20, 28, 29, 31, 32, 36, 54, 61, 84, 86, 88, 91, 92, 98, 99, 100, 105, 111, 121, 123, 125, 127, 132, 140, 141, 147, 154, 158, 160, 169, 171], "higher": [0, 1, 3, 4, 6, 8, 10, 12, 15, 16, 17, 19, 20, 21, 43, 69, 79, 82, 93, 94, 98, 102, 104, 113, 114, 117, 118, 121, 126, 140, 147, 156, 160, 161, 166], "highest": [4, 5, 8, 21, 36, 54, 82, 114, 115, 155, 166], "highli": [11, 15, 16, 19, 20, 21, 40, 41, 121, 125, 131, 135], "highlight": [4, 7, 98, 105, 135, 138], "highwai": 29, "hin": 14, "hinderlit": 28, "hint": [20, 36, 141], "histor": [20, 99], "histori": [15, 34], "hit": [0, 15, 19, 40, 43, 61, 62, 92, 138, 139, 155, 160], "hk": 121, "hmac": 155, "hmm": [11, 33], "ho": 118, "hoc": [127, 146], "hold": [0, 1, 10, 11, 16, 62, 87, 93, 99, 111, 112, 115, 116, 117, 118, 121, 134, 142, 147, 155, 162, 172], "home": [2, 38, 42, 70, 132], "home_dir": 38, "homework": 33, "homo_head_pattern": 142, "homogen": [88, 110], "honor": 29, "hood": 154, "hope": [8, 11, 12, 13, 16, 17, 33], "hopefulli": 10, "hopper": [2, 3, 4, 7, 9, 12, 14, 15, 16, 21, 23, 35, 79, 100, 106, 113, 117, 135, 144, 150, 158, 160], "horizont": [15, 23, 144], "host": [1, 9, 15, 17, 18, 19, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 48, 62, 64, 65, 66, 88, 104, 106, 109, 118, 129, 139, 141, 155, 159, 160], "host_cache_s": [62, 93, 95, 155, 173], "host_context_length": [141, 142, 143, 146, 151], "host_context_progress": [141, 142, 151], "host_cross_kv_cache_block_offset": [142, 146], "host_cross_kv_cache_pool_map": 142, "host_cross_kv_cache_pool_point": 142, "host_kv_cache_block_offset": [141, 142, 146, 151], "host_kv_cache_block_point": 151, "host_kv_cache_pool_map": [141, 142, 151], "host_kv_cache_pool_point": [141, 142, 151], "host_max_attention_window_s": [141, 142, 151], "host_past_key_value_length": [141, 142, 151], "host_path": [28, 29, 32], "host_request_typ": [141, 142, 143, 151], "host_runtime_perf_knob": [141, 142, 151], "host_sink_token_length": [141, 142, 151], "hostcaches": [0, 117], "hostfunc": 10, "hostmemori": 1, "hostnam": [17, 27, 88], "hot": [16, 20, 33, 99, 104], "hottest": 16, "hour": 133, "hous": [16, 134], "how": [0, 8, 12, 16, 17, 19, 20, 21, 23, 26, 28, 29, 31, 32, 33, 36, 37, 41, 42, 52, 61, 62, 64, 68, 70, 78, 79, 81, 84, 88, 95, 96, 98, 100, 104, 105, 109, 110, 111, 121, 123, 125, 126, 127, 129, 131, 133, 135, 136, 137, 139, 141, 144, 147, 149, 151, 153, 155, 159, 162, 163, 165, 168, 169, 173], "howev": [2, 3, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 42, 79, 82, 94, 98, 103, 104, 111, 113, 121, 127, 132, 134, 135, 138, 139, 140, 147, 156, 160, 161, 162, 166], "hpc": 4, "html": [1, 22, 28, 29, 31, 32, 125, 141, 151], "http": [0, 1, 2, 9, 11, 12, 13, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 44, 45, 46, 63, 72, 73, 74, 75, 76, 88, 90, 95, 97, 100, 106, 109, 112, 118, 125, 127, 141, 144, 149, 151, 154, 159, 160, 168, 173, 174, 175], "http_code": [9, 18, 21, 28, 29, 30, 31, 32, 33, 34], "huang": 61, "hub": [18, 21, 24, 28, 29, 31, 32, 42, 67, 100, 132, 155, 160, 175], "hudson": [29, 33], "hug": [24, 28, 29, 31, 32, 33, 34, 42, 47, 85, 86, 100, 101, 111, 118, 122, 127, 132, 143, 155, 159, 160, 170, 171], "huge": [20, 160], "hugepag": 20, "hugepages": 20, "hugepages_fre": 20, "hugepages_rsvd": 20, "hugepages_surp": 20, "hugepages_tot": 20, "hugetlb": 20, "huggingfac": [0, 2, 9, 12, 21, 22, 24, 27, 28, 29, 31, 32, 38, 42, 43, 45, 67, 73, 85, 95, 103, 118, 123, 124, 126, 127, 132, 133, 150, 151, 154, 156, 157, 158, 159, 160, 161, 166, 170, 173], "huggingface_exampl": [100, 175], "huggingface_hub": [9, 67], "huggingface_model_card": [100, 175], "human": [13, 40, 42, 101, 132], "hundr": 16, "hurt": [10, 15, 16, 98, 139], "hw": [13, 15, 16], "hybrid": [99, 112, 160], "hyper": 124, "hyperclova": 148, "hyperclovax": [150, 160], "hypothes": 102, "hypothesi": 121, "i": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 46, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 90, 91, 92, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 126, 127, 129, 132, 133, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 146, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 175, 176, 177, 178], "ia3": [79, 113], "iactivationlay": 125, "ib": [37, 88, 110], "ibm": [86, 171], "ibrahimamin1": 160, "ibufferptr": 1, "icon": 34, "iconstantlay": 141, "icudaengin": [146, 147], "id": [0, 1, 10, 14, 16, 18, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 42, 43, 47, 59, 63, 78, 79, 92, 93, 99, 103, 111, 117, 132, 141, 142, 146, 154, 155, 159, 163, 177], "idea": [10, 12, 15, 16, 118, 139], "ideal": [8, 16, 17, 103, 115, 135, 138, 160], "idempot": 155, "ident": [15, 20, 23, 61, 102, 106, 111, 117, 141, 144, 176], "identifi": [0, 11, 16, 17, 40, 42, 61, 92, 114, 118, 121, 125, 132, 138, 141, 155, 160], "identity_plugin": 144, "idl": [0, 20, 40, 97, 174], "idtyp": [0, 111], "idx": [70, 146], "ieee": 149, "ieinsumlay": 141, "ielementwiselay": 141, "iexecutioncontext": [146, 147], "ifb": [8, 17, 88, 105, 121, 148, 160], "ifilllay": 141, "igatherlay": 141, "ignor": [0, 9, 23, 26, 28, 29, 30, 31, 32, 33, 34, 38, 42, 82, 132, 141, 144, 146, 155, 166], "ignore_eo": [155, 160], "igptdecod": 1, "ihostmemori": [1, 125, 146], "ii": [29, 113, 141], "ij": 141, "ijk": 141, "ijl": 141, "ik": 141, "ikl": 141, "ilay": [115, 125], "illeg": 160, "illustr": [8, 11, 13, 14, 16, 17, 26, 28, 40, 88, 92, 94, 97, 98, 115, 121], "ilogg": 1, "ilooplay": 141, "imag": [9, 21, 22, 26, 27, 28, 29, 31, 32, 42, 45, 51, 64, 65, 66, 73, 96, 105, 108, 109, 132, 142, 146, 150, 157, 160], "image64": 73, "image_base64": 27, "image_data_format": 22, "image_grid_thw": 146, "image_nam": [30, 33, 34], "image_patches_indic": 146, "image_path": 146, "image_s": 143, "image_tag": [30, 33, 34, 107], "image_token_index": 146, "image_url": [27, 45, 73], "image_url1": 73, "image_url2": 73, "imagin": 11, "imatrixmultiplylay": 141, "imb": 16, "imbal": [8, 16, 20, 21, 99, 138], "imbalanc": 16, "immedi": [8, 11, 20, 33, 40, 79, 85, 92, 113, 121, 133, 151, 170], "immut": 1, "impact": [3, 7, 8, 12, 13, 14, 15, 16, 20, 27, 39, 40, 67, 98, 119, 121, 134, 135, 138, 139, 140], "imped": [7, 16], "impl": [0, 104, 178], "implement": [3, 10, 12, 15, 17, 18, 21, 22, 27, 39, 61, 63, 78, 81, 85, 86, 87, 88, 98, 99, 102, 103, 104, 105, 110, 111, 113, 114, 116, 120, 121, 124, 125, 127, 141, 142, 143, 149, 150, 151, 155, 156, 160, 161, 162, 165, 170, 171, 172, 177, 178], "impli": [10, 12], "implic": [8, 10], "implicit": [1, 10, 113, 121, 141], "implicitli": 1, "import": [1, 3, 7, 10, 11, 14, 15, 18, 20, 22, 27, 33, 36, 37, 40, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 84, 87, 90, 93, 94, 95, 98, 100, 102, 103, 104, 109, 119, 121, 126, 127, 133, 135, 138, 139, 140, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 169, 172, 173, 175, 176, 177], "importantli": [16, 38], "impos": 7, "imposs": [10, 104], "impract": 104, "improv": [3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 26, 28, 29, 31, 32, 33, 34, 40, 42, 43, 52, 57, 58, 60, 63, 68, 79, 96, 97, 98, 101, 109, 113, 117, 119, 125, 132, 133, 135, 136, 137, 138, 139, 144, 152, 153, 154, 155, 159, 160, 163, 174], "in_channel": 142, "in_featur": [124, 125, 142], "in_hidden_s": 141, "in_len": 115, "in_point": 141, "in_progress": 146, "in_proj": 166, "inadequ": 104, "incex": 155, "includ": [0, 1, 3, 4, 6, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 42, 43, 47, 56, 68, 78, 79, 82, 85, 86, 88, 92, 96, 98, 103, 104, 106, 111, 113, 114, 117, 118, 120, 121, 124, 125, 126, 132, 135, 139, 141, 149, 151, 152, 154, 155, 158, 160, 162, 163, 166, 170, 171, 177, 178], "include_stop_str_in_output": 155, "inclus": [88, 110, 141], "incom": [8, 40, 61], "incompat": [23, 85, 153, 160, 170], "inconsist": 11, "incorpor": [0, 13, 16, 85, 96, 160, 170], "incorrect": [12, 117, 121, 160], "increas": [0, 2, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 37, 40, 41, 42, 63, 79, 88, 93, 94, 98, 104, 105, 113, 117, 121, 125, 131, 132, 133, 135, 139, 140, 141, 144, 160, 178], "increasingli": [8, 11, 20], "increment": [16, 106, 160], "incur": [13, 17, 40, 88, 110, 125], "inde": 147, "indent": 155, "independ": [0, 1, 12, 17, 88, 99, 110, 111, 121, 141], "index": [0, 1, 13, 18, 20, 21, 24, 28, 29, 30, 31, 32, 33, 34, 37, 47, 70, 79, 92, 105, 109, 111, 116, 121, 126, 141, 146, 155, 159, 160, 163], "index_head_dim": 155, "index_max_chunk_s": 70, "index_n_head": 155, "index_select": 141, "index_topk": 155, "indexer_max_chunk_s": [70, 155], "indexerdimperhead": 0, "indexerkcachequantblocks": 0, "indic": [0, 1, 8, 10, 11, 12, 17, 20, 21, 36, 79, 90, 92, 93, 111, 113, 114, 121, 124, 140, 141, 142, 146, 147, 155, 177], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [8, 13, 16, 17, 37, 160, 172], "indivis": 160, "inductor": [104, 155], "industri": [21, 42, 132], "ineffect": 10, "ineffici": [11, 12, 13, 79, 98, 113], "inetworkdefinit": [115, 125, 141], "inevit": [10, 125], "infeas": 111, "infer": [0, 2, 3, 4, 5, 8, 10, 12, 13, 15, 20, 28, 29, 31, 32, 33, 34, 35, 40, 41, 43, 45, 49, 64, 68, 73, 78, 81, 82, 85, 86, 88, 91, 93, 94, 95, 96, 99, 100, 101, 103, 104, 105, 110, 114, 118, 121, 125, 127, 131, 133, 134, 135, 136, 137, 139, 140, 141, 146, 149, 151, 152, 154, 155, 158, 160, 162, 165, 166, 170, 171, 173], "infer_shap": 146, "inferencemax": 12, "inferenceoptim": [82, 166], "inferencerequest": 160, "infin": [10, 129], "infiniband": [88, 110], "infinit": [42, 43, 125, 132], "inflat": 13, "inflight": [0, 27, 79, 98, 113, 118, 121, 130, 132, 136, 137, 138, 141, 155, 160, 163, 178], "inflight_request_id": 178, "inflightbatch": 0, "inflightbatchingstat": 0, "influenc": [13, 139], "info": [0, 16, 22, 23, 24, 26, 27, 29, 30, 39, 61, 83, 132, 147, 151, 160, 167], "inform": [0, 1, 3, 6, 10, 13, 14, 16, 17, 20, 22, 27, 28, 29, 31, 32, 39, 42, 43, 69, 76, 78, 79, 84, 85, 87, 88, 90, 92, 93, 96, 104, 106, 109, 110, 111, 113, 114, 116, 121, 124, 125, 132, 133, 150, 151, 160, 166, 169, 170, 172], "infrastructur": [8, 11, 36, 87, 172], "infti": [8, 114], "ingest": [82, 166], "inher": [8, 16, 99], "inherit": [11, 39, 82, 102, 126, 127, 141, 155, 156, 161, 162, 166, 177, 178], "init": [1, 2, 15, 18, 104, 106, 155, 160], "init_audio_encod": 146, "init_backend": 155, "init_build_config": 155, "init_calib_config": 155, "init_image_encod": 146, "init_llm": 146, "init_model_and_config": [87, 172], "init_processor": 146, "init_token": 146, "init_with_new_llm": 11, "initi": [0, 1, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22, 28, 29, 31, 32, 33, 34, 38, 39, 40, 42, 79, 80, 81, 82, 87, 88, 90, 92, 93, 95, 98, 101, 103, 110, 121, 126, 132, 135, 138, 139, 146, 147, 151, 155, 156, 160, 161, 163, 164, 165, 166, 172, 173, 178], "initial_global_assign": 16, "initial_prompt": 11, "initial_prompt_token_num": 11, "initialis": [144, 155], "initializecommand": 38, "initializer_list": [0, 1], "initmemorypool": 147, "inittozero": 1, "inject": 103, "inlin": [0, 1], "inner": [82, 141, 166], "inner_layernorm": [142, 143], "innov": [12, 15, 16, 18], "inp": [141, 155], "inpaint": [27, 45, 73], "inplac": [10, 102], "inprogress": 1, "input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 40, 41, 42, 43, 47, 50, 51, 61, 65, 73, 78, 88, 91, 93, 94, 96, 98, 99, 103, 104, 105, 111, 114, 115, 117, 118, 119, 121, 125, 126, 131, 132, 133, 134, 136, 137, 139, 140, 141, 142, 143, 144, 146, 147, 151, 155, 156, 160, 161, 162, 163, 178], "input_1": 141, "input_1_": 141, "input_audio": 146, "input_context": 70, "input_featur": 143, "input_fil": [70, 160], "input_id": [13, 42, 104, 117, 123, 132, 141, 143, 146, 151, 156, 161], "input_imag": 146, "input_layernorm": [123, 124, 126, 156, 161], "input_length": [30, 141, 142, 143, 146], "input_list": [104, 141], "input_n": 141, "input_n_": 141, "input_queri": 70, "input_sequence_len": 19, "input_str": 11, "input_text": [123, 125, 146], "input_timing_cach": [23, 155], "input_token_extra_id": 146, "inputbuff": 1, "inputdesc": 125, "inputdtyp": 1, "inputgentokenshost": 1, "inputpack": [1, 114], "inputs_emb": [156, 161], "inputtokenextraid": 0, "inputtokenid": 0, "ins": 155, "insert": [10, 12, 16, 85, 86, 103, 104, 115, 125, 132, 141, 155, 170, 171], "insert_cached_attent": [80, 82, 164, 166, 168, 169], "insert_cached_mla_attent": 169, "insertinputtensor": 1, "insid": [1, 2, 9, 12, 14, 15, 20, 28, 29, 31, 32, 33, 34, 64, 65, 66, 79, 104, 106, 121, 126, 127, 141, 147, 159, 163], "insight": [8, 13, 16, 17], "insiz": 1, "inspect": [21, 23, 41, 82, 131, 147, 166], "inspir": [10, 12, 14], "instabl": 8, "instal": [9, 18, 21, 24, 28, 29, 30, 31, 32, 33, 34, 49, 64, 65, 66, 85, 101, 105, 106, 127, 128, 133, 154, 156, 159, 160, 161, 170], "instanc": [0, 8, 10, 11, 13, 16, 17, 20, 21, 28, 29, 31, 32, 33, 34, 47, 61, 63, 79, 88, 90, 92, 93, 102, 103, 110, 111, 114, 115, 116, 121, 125, 144, 146, 147, 154, 155, 159, 160, 163], "instance_idx": 151, "instanti": [133, 140, 155, 177], "instead": [2, 3, 8, 11, 12, 16, 20, 21, 22, 28, 30, 36, 37, 47, 61, 62, 79, 85, 93, 94, 95, 101, 103, 104, 106, 109, 115, 117, 121, 125, 127, 139, 140, 141, 147, 155, 160, 170, 173], "instruct": [2, 15, 17, 18, 20, 21, 24, 27, 28, 29, 31, 32, 35, 42, 43, 45, 51, 52, 57, 61, 70, 71, 73, 82, 86, 90, 92, 96, 100, 102, 106, 107, 109, 121, 132, 133, 134, 139, 140, 150, 152, 154, 156, 157, 159, 160, 161, 166, 168, 171, 175, 176], "instrument": 15, "int": [0, 1, 36, 59, 61, 63, 70, 79, 88, 92, 94, 102, 104, 114, 123, 124, 125, 127, 138, 141, 142, 143, 144, 146, 155, 156, 161, 163, 177, 178], "int32": [1, 10, 23, 104, 113, 141, 144, 151], "int32_t": [0, 1, 141], "int4": [7, 16, 21, 23, 47, 126, 127, 150, 160], "int4_weight": 149, "int64": [1, 10, 114, 141, 151], "int64_t": [0, 1], "int8": [1, 7, 16, 21, 22, 23, 100, 124, 126, 127, 135, 141, 147, 150, 155, 160], "int8_kv_cach": [79, 113, 149, 160], "int8_t": [0, 1], "int8_weight": 149, "int8awq": 135, "int_clip": 141, "integ": [22, 42, 76, 78, 79, 90, 93, 94, 98, 113, 132, 141, 144, 149, 155, 160], "integr": [11, 16, 17, 20, 21, 22, 36, 39, 40, 79, 80, 84, 85, 88, 91, 96, 101, 105, 121, 154, 158, 160, 162, 163, 164, 168, 169, 170, 177, 178], "intellig": [18, 69, 99, 158], "intend": [0, 24, 28, 29, 31, 32, 33, 34, 38, 39, 104, 106, 147, 155], "intens": [8, 10, 15, 16], "intent": 133, "intention": [9, 127, 155], "intenum": [103, 141], "inter": [16, 21, 42, 88, 110, 133, 134, 135, 138, 139, 151, 160], "inter_layernorm": 143, "inter_s": 126, "interact": [16, 17, 21, 40, 88, 91, 92, 106, 111, 121, 151, 159], "interchang": [10, 90, 116], "interconect": 134, "interconnect": [114, 133, 134, 135, 138, 139], "interest": [12, 16, 42, 132], "interestingli": 20, "interf": 20, "interfac": [1, 11, 20, 36, 39, 61, 84, 87, 103, 104, 125, 127, 133, 146, 154, 156, 160, 161, 162, 169, 172], "interfer": [16, 17, 88, 151], "interleav": [15, 79, 98, 113, 125], "intermedi": [10, 11, 15, 22, 78, 79, 113, 125, 151], "intermediate_s": [124, 143], "intermitt": 10, "intern": [1, 2, 13, 15, 27, 29, 36, 38, 39, 40, 61, 79, 87, 92, 104, 111, 113, 116, 120, 127, 133, 136, 137, 147, 151, 160, 172, 177], "internal_cutlass_kernel": 120, "internal_error": [22, 23, 24, 27, 83, 167], "internlm": [149, 150, 160], "internlm2": [149, 150, 160], "internvl2": 160, "interpol": 141, "interpolation_scal": 142, "interpret": [10, 20, 30, 37, 78, 106, 111, 138, 155], "interrupt": 20, "intersect": 110, "interst": 29, "intertwin": 139, "interv": [8, 26, 27, 28, 29, 31, 32, 155], "interven": 10, "intflag": [143, 145], "intpsplitdim": 1, "intra": 134, "introduc": [2, 4, 8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 40, 79, 87, 88, 94, 95, 97, 99, 101, 104, 127, 129, 149, 160, 172, 173, 174], "introduct": [20, 21, 105, 136, 137, 159, 160], "inttensor": [146, 156, 161], "intuit": [15, 36, 136, 137], "inv": 141, "inv_freq": 141, "invalid": [0, 1, 16, 20, 70, 71, 88, 110, 151, 160, 166], "invalid_st": 1, "invalidateremoteag": 0, "inventori": [42, 132], "invers": [79, 113], "invest": [42, 132], "investig": [2, 10, 20, 160], "invoc": [39, 160], "invok": [0, 10, 11, 16, 20, 37, 40, 80, 84, 103, 111, 115, 151, 164, 169, 178], "invokequant": 125, "involv": [0, 1, 7, 14, 15, 17, 20, 26, 39, 79, 88, 96, 104, 121, 125, 142, 162, 163, 177], "io": [9, 11, 18, 21, 22, 26, 28, 29, 31, 32, 64, 65, 66, 79, 104, 113, 128, 129, 147, 159, 160], "ip": [0, 39, 160], "ipc": [9, 18, 21, 26, 28, 29, 31, 32, 106, 154, 155, 159, 160], "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 141, "ireducelay": 141, "irrelev": 10, "irrespect": [0, 114, 155], "is_alibi": 141, "is_async": 92, "is_caus": 142, "is_comm_sess": 155, "is_const_v": 1, "is_context_fmha_en": 144, "is_cuda_graph": [79, 163], "is_cutlass_min_lat": 141, "is_def": 141, "is_dora": 118, "is_dynam": 141, "is_enc_dec": 146, "is_expert": 142, "is_fin": 155, "is_gated_activ": 141, "is_gemma_2": 143, "is_gemma_3": 143, "is_keep_al": [19, 71, 103, 155], "is_linear_tre": 155, "is_loc": 142, "is_medusa_mod": 146, "is_mla_en": 141, "is_mla_enabled_flag": 141, "is_module_excluded_from_quant": 155, "is_mrop": 141, "is_network_input": 141, "is_orchestrator_mod": 146, "is_public_pool": [19, 71, 103, 155], "is_qkv": 142, "is_redrafter_mod": 146, "is_rop": 141, "is_trt_wrapp": 141, "is_use_oldest": [19, 71, 103, 155], "is_valid": [141, 142], "is_valid_cross_attn": 142, "isagentst": 0, "isalnum": 155, "isalpha": 155, "isascii": 155, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdecim": 155, "isdigit": 155, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 141, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 111], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 141, "isidentifi": 155, "iskeyword": 155, "iskvcacheen": 1, "isl": [0, 3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 42, 43, 82, 88, 105, 132, 139, 166], "isl8192": 17, "island": [29, 33], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 141, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "islow": 155, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": [16, 33, 61, 151], "isnon": 1, "isnumer": 155, "isoffload": 0, "isoftmaxlay": 141, "isol": 21, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 160], "ispipelineparallel": 1, "ispoint": 1, "isprint": 155, "isrnnbas": 1, "issequencefin": [0, 111], "issocketst": 0, "isspac": 155, "issu": [8, 10, 11, 12, 14, 16, 18, 21, 28, 29, 31, 32, 33, 34, 38, 42, 43, 67, 79, 88, 93, 101, 105, 106, 109, 110, 113, 125, 127, 132, 133, 141, 151, 166], "istensorparallel": 1, "isthreadsaf": 0, "istitl": 155, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isupp": 155, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "itali": 90, "ite": 146, "item": [0, 11, 15, 104, 111, 146, 155], "itensor": [0, 141], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 8, 10, 11, 13, 14, 16, 19, 20, 21, 22, 27, 36, 39, 40, 62, 70, 79, 92, 94, 98, 103, 104, 111, 113, 121, 126, 132, 133, 138, 139, 140, 146, 155, 160], "iter_i": 8, "iter_stat": 160, "iter_stats_max_iter": [62, 155], "iteration_log": 22, "iterationresult": 155, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 27], "iterlatencymillisec": 160, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 141, "itl": [16, 30, 33, 34, 42, 135, 139, 160], "its": [0, 1, 3, 5, 11, 12, 13, 14, 16, 17, 19, 21, 34, 37, 39, 40, 42, 56, 62, 79, 88, 91, 92, 93, 98, 99, 101, 103, 104, 111, 113, 114, 115, 116, 122, 124, 125, 126, 127, 132, 134, 136, 137, 138, 139, 141, 146, 147, 154, 155, 162, 163, 178], "itself": [10, 15, 16, 82, 111, 146, 155, 160, 166], "itsuji": [42, 132], "iunarylay": 141, "j": [4, 7, 8, 11, 14, 42, 64, 65, 66, 79, 113, 114, 132, 141, 149, 150, 160], "jacobi": 121, "jai": 160, "jame": 29, "jamesthez": 160, "janpetrov": 160, "japanes": [42, 118, 132], "jargon": 33, "jax": [21, 124, 127], "jenkin": [38, 105], "jensen": 61, "jersei": [29, 30, 33], "jetson": 21, "jfk": 29, "ji": 141, "jit": [2, 160], "jj": 141, "jk": 141, "jl749": 160, "job": [30, 37, 65, 66, 125], "job_nam": 30, "job_tim": 30, "john": [28, 29], "join": [11, 17, 61, 87, 88, 155, 172], "joint": 15, "joint_attention_kwarg": 143, "joint_attn_forward": 142, "joke": 69, "journei": [13, 159], "jpeg": 27, "jpg": [42, 132], "json": [0, 1, 9, 10, 16, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 41, 42, 44, 45, 46, 50, 51, 56, 70, 77, 88, 105, 111, 124, 131, 132, 144, 155, 159, 160, 168], "json_data": 155, "json_object": 155, "json_schema": [90, 155], "json_schema_extra": 155, "jsonconfigstr": 0, "jsondecodeerror": 76, "jsonl": [22, 42, 70, 132], "jsonseri": 0, "judgement": 16, "juli": 12, "just": [0, 1, 11, 14, 15, 16, 19, 28, 29, 31, 32, 33, 34, 42, 63, 64, 65, 66, 67, 96, 103, 106, 121, 132, 133, 140, 146, 147, 160], "justic": [52, 67, 109, 152, 154, 159], "justifi": 155, "k": [1, 13, 14, 15, 19, 40, 69, 70, 79, 99, 102, 103, 105, 112, 113, 114, 118, 121, 141, 149, 151, 155, 156, 160, 161, 163, 176], "k2": 35, "k8": 105, "k_b_proj_tran": 141, "k_dim": 141, "k_nope": 12, "k_nope_s": 12, "k_proj": [42, 87, 95, 126, 132, 156, 161, 166, 172, 173], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kb": 20, "kbatchedpostprocessornam": [0, 111], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 160, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 122], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpumemfract": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 111], "keep": [0, 12, 13, 15, 16, 19, 21, 33, 37, 43, 79, 98, 113, 114, 120, 127, 140, 141, 155, 160], "keepdim": 141, "keepend": 155, "kei": [0, 3, 7, 8, 9, 11, 12, 14, 15, 16, 19, 20, 21, 22, 33, 40, 42, 43, 61, 62, 68, 79, 81, 87, 90, 92, 93, 96, 98, 103, 105, 111, 117, 125, 132, 138, 143, 146, 151, 155, 162, 163, 165, 172, 177], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kennedi": 29, "kept": [16, 19, 79, 113, 127, 141, 155], "kequal_progress": 0, "kera": 127, "kernel": [1, 3, 14, 15, 17, 23, 28, 30, 31, 32, 40, 41, 68, 70, 79, 81, 86, 98, 100, 103, 104, 105, 113, 117, 120, 125, 131, 135, 139, 141, 144, 146, 147, 151, 155, 158, 160, 165, 171], "kernel_s": [70, 141, 142, 155], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [141, 142], "keyvaluecacheparam": [142, 143], "keyword": [27, 82, 126, 141, 147, 155, 160, 166], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 125], "kfp16": 0, "kfp32": [0, 155], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kick": 37, "kill": 18, "kimi": 35, "kimi_k2_thinking_output": 30, "kind": [10, 11, 13, 16, 112, 113, 115, 178], "kinflight": 0, "king": 29, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 141, "kjson": [0, 111], "kjson_schema": [0, 111], "kleader": 0, "klength": 0, "klinear": 1, "kllguidanc": 0, "klookahead": 0, "klookaheaddecod": 1, "km": 29, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 26, 140, 141, 155, 160], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [11, 29, 33, 41, 54, 114, 131, 140, 141], "knowledg": [19, 33], "known": [11, 14, 16, 28, 33, 34, 37, 79, 98, 99, 105, 109, 113, 120, 121, 125, 141, 150, 159], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": 0, "korea": 27, "kosmo": [150, 160], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 111], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "kt": 155, "kt_cache_dtyp": [70, 155], "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kubernet": [17, 49, 88], "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 118, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 3, 7, 8, 12, 14, 15, 18, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 40, 42, 43, 47, 54, 55, 68, 70, 80, 81, 85, 86, 89, 91, 94, 96, 99, 104, 105, 110, 111, 118, 125, 127, 130, 132, 133, 138, 141, 144, 146, 148, 153, 155, 156, 157, 158, 160, 161, 162, 163, 164, 165, 168, 170, 171, 178], "kv_b_proj": 141, "kv_cach": [0, 39, 68], "kv_cache_block_offset": [141, 142, 146, 151], "kv_cache_block_point": 151, "kv_cache_config": [9, 27, 30, 42, 47, 48, 53, 61, 62, 68, 70, 71, 91, 94, 100, 140, 155, 177], "kv_cache_connector": 61, "kv_cache_dtyp": [2, 26, 70, 132, 135, 145, 177], "kv_cache_enable_block_reus": [146, 160], "kv_cache_fract": 70, "kv_cache_free_gpu_mem_fract": [2, 16, 21, 22, 43, 140], "kv_cache_free_gpu_memory_fract": [17, 18, 21, 24, 26, 27, 146, 160], "kv_cache_host_memory_byt": 117, "kv_cache_host_s": 62, "kv_cache_manag": [0, 79, 160, 162, 163, 177, 178], "kv_cache_max_token": 62, "kv_cache_page_s": 62, "kv_cache_param": [79, 142, 143, 163], "kv_cache_quant_algo": [42, 95, 124, 132, 135, 155, 173], "kv_cache_quant_mod": [113, 141], "kv_cache_retention_config": [54, 155], "kv_cache_reus": 27, "kv_cache_scaling_factor": [79, 113, 124], "kv_cache_tensor": [61, 92], "kv_cache_typ": [23, 125, 146, 155, 160], "kv_connector_config": [61, 155], "kv_dtype": 143, "kv_head": 142, "kv_host_cache_byt": 117, "kv_lora_rank": [141, 142], "kv_orig_quant_scal": 141, "kv_quant_orig_scal": 141, "kv_transfer_sender_future_timeout_m": 155, "kv_transfer_timeout_m": 155, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 11, 13, 88, 110, 146, 160], "kvcacheblock": 116, "kvcacheblockpool": 116, "kvcacheconfig": [0, 47, 53, 62, 68, 70, 71, 93, 94, 100, 105, 113, 117, 140, 147, 155, 160], "kvcacheconnectorconfig": [61, 155], "kvcacheconnectorschedul": 61, "kvcacheconnectorwork": 61, "kvcachecreateddata": [0, 155], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": 0, "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 40, 79, 98, 113, 117, 146, 163, 177], "kvcachemetr": 0, "kvcacheparam": [79, 163], "kvcacheremoveddata": [0, 155], "kvcacheretentionconfig": [0, 54, 105, 155], "kvcaches": 0, "kvcachestat": [0, 27], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 155], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 155], "kvcachetransferstart": 0, "kvcachetyp": [1, 146, 155], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 155], "kvfactor": 0, "kvheadnum": 141, "kvram": 0, "kvtransfersenderfuturetimeoutm": 0, "kvtransfertimeoutm": 0, "kwarg": [11, 24, 36, 81, 82, 84, 87, 126, 127, 141, 142, 143, 146, 155, 156, 160, 161, 165, 166, 169, 172], "kwd": 155, "kwrite": 0, "kxgrammar": 0, "l": [27, 42, 64, 65, 66, 121, 132, 150, 157, 166], "l0_a100": 37, "l0_mergerequest": 37, "l0_sanity_check": 37, "l0_test": 37, "l2": 160, "l304": 13, "l345": 13, "l4": 21, "l40": [21, 158], "l440": 13, "l506": 13, "l546": 13, "l823": 13, "lab": [11, 42, 132], "label": [115, 141, 142, 143, 155], "labelembed": 142, "lack": [0, 1, 36], "laguardia": 29, "lai": 14, "lake": [29, 33], "lambda": [0, 111], "lamportinitializeal": 1, "landmark": [29, 33, 34], "langchain": 11, "languag": [0, 3, 8, 12, 13, 16, 17, 19, 40, 41, 85, 95, 97, 100, 103, 114, 121, 125, 131, 141, 149, 150, 157, 158, 160, 162, 170, 173, 174], "language_adapt": [146, 160], "language_adapter_config": 146, "language_adapter_rout": [143, 146], "language_adapter_uid": 146, "language_model": 126, "languageadapterconfig": 146, "languageadapteruid": 0, "laps": 54, "laptop": 21, "larg": [0, 2, 3, 7, 8, 12, 13, 15, 17, 18, 20, 23, 27, 40, 41, 42, 45, 62, 73, 95, 96, 97, 98, 99, 103, 104, 105, 113, 117, 119, 121, 125, 127, 131, 132, 134, 135, 138, 139, 141, 144, 146, 147, 150, 151, 155, 158, 160, 162, 173, 174], "larger": [0, 4, 5, 7, 15, 17, 19, 20, 21, 33, 40, 43, 62, 79, 92, 104, 113, 114, 117, 121, 122, 132, 141, 146, 147, 155, 160], "largest": [3, 4, 5, 29, 33, 34, 103, 104, 114, 141], "last": [0, 1, 10, 12, 13, 14, 16, 18, 19, 26, 28, 29, 31, 32, 62, 63, 79, 94, 98, 111, 113, 118, 119, 121, 123, 138, 140, 141, 143, 155], "last_lay": 146, "last_process_for_ub": 141, "last_token_id": [141, 143, 151], "last_token_ids_for_logit": 143, "last_tokens_id": 141, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastli": [21, 39], "lastpositionidsbas": 1, "lasttokentim": 0, "late": [11, 67], "latenc": [0, 4, 5, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 35, 40, 42, 43, 79, 80, 88, 91, 92, 98, 103, 104, 105, 113, 117, 121, 138, 139, 140, 141, 144, 155, 158, 160, 164], "latent": [12, 15, 81, 91, 142, 143, 165, 166], "later": [0, 1, 5, 18, 28, 29, 30, 31, 32, 33, 34, 36, 82, 93, 104, 114, 118, 121, 125, 127, 135, 139, 146, 147, 151, 153, 166], "latest": [0, 2, 15, 18, 28, 29, 31, 32, 64, 65, 66, 96, 100, 105, 106, 125, 128, 160], "latitud": 33, "latter": [7, 29, 38, 98, 111, 160], "launch": [10, 15, 16, 17, 20, 27, 40, 64, 65, 66, 68, 96, 97, 103, 104, 105, 106, 109, 117, 125, 133, 151, 153, 154, 160, 168, 174], "launch_hostfunc": 10, "launch_llama_3": 125, "layer": [0, 1, 12, 14, 16, 17, 20, 21, 23, 39, 79, 82, 88, 91, 92, 93, 94, 98, 99, 103, 104, 105, 110, 112, 113, 114, 115, 116, 118, 121, 123, 124, 125, 126, 134, 141, 144, 146, 147, 149, 151, 155, 156, 160, 161, 163, 166, 177], "layer1": 118, "layer_id": 103, "layer_idx": [61, 79, 92, 118, 123, 141, 146, 156, 161, 163], "layer_names_onli": [23, 155], "layer_norm": [141, 142], "layer_quant_mod": 155, "layer_typ": 146, "layer_updates_per_it": [16, 28, 30], "layerid": [1, 118], "layeridx": 1, "layernorm": [23, 104, 123, 139, 141, 142, 144, 160], "layernorm_quantization_plugin": 144, "layernorm_shar": 142, "layernorm_typ": 142, "layernormpositiontyp": 141, "layernormtyp": [141, 142], "layertyp": [1, 115], "layerwis": 155, "layout": [9, 39, 79, 98, 138, 160], "lead": [8, 11, 12, 16, 17, 20, 21, 23, 28, 31, 42, 43, 67, 88, 98, 104, 106, 109, 110, 115, 117, 121, 125, 132, 133, 135, 138, 139, 144, 155, 160], "leader": [0, 18, 146], "leaf": 93, "leak": [10, 160], "learn": [4, 5, 7, 16, 18, 19, 21, 52, 57, 58, 60, 101, 109, 135, 141, 152, 154, 159], "learned_absolut": [124, 141, 142, 143], "least": [0, 16, 19, 27, 67, 79, 93, 98, 104, 111, 113, 127, 138, 146, 155], "leav": [17, 88, 93, 98, 104, 138, 139, 140], "left": [11, 17, 19, 20, 43, 82, 138, 140, 141, 155, 166], "legaci": [11, 126, 140, 144, 160], "legend": 8, "lego": 11, "len": [0, 1, 11, 26, 28, 29, 30, 31, 32, 33, 34, 42, 61, 63, 70, 81, 132, 141, 146, 155, 165, 178], "length": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 42, 43, 54, 63, 65, 68, 70, 79, 81, 88, 91, 93, 94, 98, 103, 104, 113, 117, 132, 133, 136, 137, 139, 140, 141, 146, 147, 151, 155, 160, 163, 165, 177], "length_penalti": [114, 146, 155], "lengthlengthpenalti": 114, "lengthpenalti": [0, 1, 114], "leq": 8, "less": [0, 4, 10, 14, 16, 19, 43, 79, 93, 94, 102, 111, 113, 114, 125, 141, 155], "lesser": 93, "let": [10, 11, 12, 13, 16, 21, 29, 42, 47, 98, 115, 123, 124, 126, 128, 132, 138, 141], "letter": [90, 141], "level": [0, 1, 8, 10, 11, 12, 14, 15, 17, 20, 21, 22, 23, 24, 27, 36, 41, 42, 78, 79, 82, 84, 85, 98, 103, 104, 105, 111, 113, 116, 118, 120, 123, 124, 126, 127, 131, 132, 147, 154, 155, 156, 158, 160, 161, 166, 169, 170], "leverag": [3, 8, 10, 12, 13, 14, 16, 17, 18, 19, 21, 28, 29, 30, 31, 32, 33, 34, 85, 86, 96, 121, 135, 158, 170, 171, 172], "lf": [2, 18, 106, 118, 154], "lfz941": 160, "lga": 29, "lgai": [150, 157], "lh": 1, "lib": [127, 132], "liberti": [29, 33], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 106, "libopenmpi": [85, 109, 170], "librari": [16, 17, 20, 21, 22, 31, 79, 85, 86, 88, 104, 106, 120, 125, 151, 154, 158, 160, 163, 170, 171], "libtensorrt_llm": 106, "libzmq3": 109, "licens": [108, 109, 154], "life": 67, "lifecycl": [49, 101, 116], "lifetim": [20, 61, 104], "light": [86, 171], "lightweight": [10, 16, 19, 24, 85, 86, 104, 113, 170, 171], "like": [0, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 40, 42, 47, 52, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 69, 78, 79, 82, 85, 86, 87, 88, 90, 93, 98, 99, 100, 103, 104, 107, 109, 110, 111, 113, 114, 115, 117, 121, 124, 125, 127, 132, 133, 134, 135, 138, 139, 140, 141, 144, 147, 151, 152, 153, 154, 155, 156, 159, 160, 161, 162, 166, 170, 171, 172, 177], "likelihood": [63, 78, 112, 117, 121], "likewis": 87, "limit": [0, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 22, 27, 47, 61, 62, 79, 88, 94, 98, 103, 104, 105, 109, 110, 111, 113, 114, 115, 125, 127, 133, 138, 140, 141, 144, 146, 147, 150, 154, 155, 163], "lin": 3, "line": [2, 7, 17, 22, 26, 27, 28, 29, 31, 32, 33, 37, 38, 42, 53, 63, 64, 65, 66, 67, 70, 82, 90, 93, 117, 132, 133, 135, 139, 147, 155, 159, 160, 166, 177, 178], "linear": [1, 12, 15, 79, 85, 118, 121, 123, 124, 125, 141, 147, 149, 156, 160, 161, 163, 166, 170], "linearactiv": 142, "linearapproximategelu": 142, "linearbas": 142, "lineargeglu": 142, "lineargelu": 142, "linearli": 147, "linearswiglu": 142, "liner": 27, "link": [2, 13, 19, 20, 21, 37, 117, 128, 129, 160], "linspac": 141, "lint": 155, "linux": [20, 28, 29, 30, 31, 32, 33, 34, 38, 108, 150, 160], "linux_x86_64": 106, "list": [0, 1, 10, 14, 16, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 42, 47, 54, 61, 63, 78, 79, 80, 81, 82, 86, 87, 92, 93, 94, 102, 103, 104, 105, 106, 111, 113, 114, 115, 124, 125, 126, 130, 132, 133, 141, 142, 143, 146, 150, 151, 155, 160, 163, 164, 165, 171, 172, 177, 178], "list_siz": 142, "listen": 9, "lite": 91, "liter": [33, 144, 155], "littl": [14, 16, 139], "live": [37, 147], "livecodebench": 13, "ljust": 155, "lkm2835": 160, "ll": [7, 14, 21, 33, 103], "ll128": [12, 16, 20], "llama": [4, 5, 7, 10, 18, 19, 23, 24, 26, 31, 32, 35, 42, 52, 57, 61, 64, 65, 66, 70, 71, 75, 80, 82, 85, 86, 87, 90, 100, 101, 102, 103, 105, 109, 114, 118, 121, 122, 126, 127, 133, 134, 136, 137, 138, 140, 143, 144, 148, 149, 150, 152, 153, 154, 156, 157, 158, 159, 160, 161, 164, 166, 168, 170, 171, 172, 175, 176], "llama2": [3, 4, 79, 105, 118, 160], "llama3": [26, 35, 71, 141, 160], "llama4": [16, 35, 94, 99, 105, 155, 160], "llama4_output": 32, "llama4forconditionalgener": [150, 157], "llama_13b": 5, "llama_70b": 5, "llama_7b": [118, 122], "llama_7b_with_lora_qkv": 118, "llama_model_path": 47, "llamaconfig": [143, 156, 161], "llamaforcausallm": [126, 127, 143, 150, 157], "llamamodel": 143, "llava": [100, 126, 148, 149, 150, 158, 160], "llava_dict": 126, "llavallamamodel": [150, 157], "llavanextforconditionalgener": [150, 157], "llavanextvisionconfig": 143, "llavanextvisionwrapp": 143, "llguidanc": [0, 10, 56, 76, 90, 155, 160], "lllama": 104, "llm": [0, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 35, 37, 38, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 53, 54, 56, 57, 58, 59, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 84, 86, 87, 88, 92, 93, 94, 95, 98, 101, 102, 103, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 123, 125, 130, 131, 134, 135, 136, 137, 139, 140, 141, 143, 144, 145, 146, 149, 151, 153, 155, 156, 161, 162, 163, 164, 169, 171, 172, 173, 175, 176, 177, 178], "llm_advanc": 68, "llm_arg": [36, 43, 61, 82, 84, 95, 155, 166, 169, 173], "llm_engine_dir": 146, "llm_id": 155, "llm_inference_async_rai": 101, "llm_inference_distribut": 154, "llm_inference_distributed_rai": 101, "llm_kv_cache_connector": [61, 92], "llm_kv_cache_offload": 62, "llm_mgmn_": 160, "llm_mgmn_llm_distribut": 64, "llm_mgmn_trtllm_bench": 65, "llm_mgmn_trtllm_serv": 66, "llm_models_root": [73, 75], "llm_option": 43, "llm_ptq": [100, 175], "llm_runtim": 68, "llm_sampl": 69, "llm_sparse_attent": 70, "llm_util": 155, "llm_worker": 11, "llmapi": [16, 26, 27, 36, 43, 47, 53, 54, 56, 61, 62, 64, 65, 66, 68, 70, 71, 95, 100, 103, 111, 135, 146, 155, 160, 173], "llmarg": [27, 43, 82, 105, 119, 155, 160, 166], "llmrequest": [1, 61, 92, 160, 177, 178], "llmrequestptr": 1, "llmrequestst": 178, "lm": [28, 31, 32, 121, 155], "lm_eval": [24, 28, 31, 32], "lm_head": [10, 123, 126, 132, 160], "lmm": [42, 114, 132], "lmsy": [97, 174], "ln_emb": 126, "ln_f": [123, 126], "lo": 154, "load": [0, 1, 6, 9, 12, 13, 15, 17, 18, 21, 22, 23, 28, 29, 30, 31, 32, 33, 34, 39, 40, 43, 61, 62, 64, 65, 66, 70, 76, 80, 81, 82, 85, 88, 92, 95, 105, 118, 123, 124, 125, 127, 132, 133, 139, 140, 143, 145, 146, 147, 154, 155, 158, 159, 160, 164, 165, 166, 168, 170, 173], "load_balanc": [16, 28, 30, 155], "load_base64_imag": 27, "load_config": [87, 172], "load_format": 155, "load_model_on_cpu": 143, "load_tensor": 126, "load_test_audio": 146, "load_test_data": 146, "load_weight": [87, 156, 161, 172], "loaded_weight": 142, "loader": [105, 155, 160], "loadformat": 155, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [0, 1, 2, 9, 13, 16, 21, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35, 38, 42, 43, 52, 57, 58, 59, 60, 64, 65, 66, 71, 85, 91, 99, 106, 107, 109, 124, 125, 132, 135, 144, 152, 155, 159, 160, 168, 170, 177], "local_build": 38, "local_in_featur": 142, "local_layer_idx": 142, "local_model": [64, 65, 66], "local_model_path": 21, "local_out_featur": 142, "local_path": 31, "local_path_to_model": 154, "local_us": [2, 30, 33, 34, 106], "localcr": 1, "localhost": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 44, 45, 46, 48, 50, 51, 72, 73, 74, 75, 76, 88, 90, 95, 159, 168, 173], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localn": 155, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 13, "localscaless": 1, "localtotals": 1, "locat": [15, 16, 20, 28, 29, 30, 31, 32, 33, 37, 43, 79, 90, 91, 104, 106, 114, 115, 125, 132, 141, 151, 159, 163], "locate_accepted_draft_token": 146, "lock": [10, 16, 20, 42, 109, 132], "lockstep": 0, "log": [0, 1, 9, 20, 21, 22, 23, 24, 26, 27, 30, 42, 62, 64, 65, 66, 69, 70, 78, 79, 85, 90, 102, 103, 113, 116, 128, 132, 141, 144, 147, 155, 160, 170, 176], "log_ctx_0": 88, "log_ctx_1": 88, "log_field_chang": 144, "log_gen_0": 88, "log_level": [22, 23, 24, 27], "log_sampl": [28, 31, 32], "log_softmax": 141, "logger": [61, 83, 167], "logic": [10, 11, 17, 20, 24, 38, 40, 63, 87, 92, 97, 102, 103, 104, 111, 116, 126, 127, 142, 143, 155, 156, 160, 161, 162, 172, 178], "login": [9, 128], "logit": [0, 10, 12, 13, 14, 40, 42, 55, 78, 89, 103, 105, 114, 121, 132, 141, 146, 151, 155, 157, 160, 176], "logits_dtyp": [23, 124, 143], "logits_processor": [63, 102, 146, 155], "logits_processor_map": 146, "logits_processor_nam": 146, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 111], "logitspostprocessorconfig": [0, 111, 160], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [63, 102, 146, 155, 160], "logitsprocessorlist": 146, "logitsvec": 1, "logn": [141, 160], "logn_scal": 141, "logprob": [0, 1, 18, 21, 28, 29, 30, 31, 32, 33, 34, 47, 69, 155, 159, 160], "logprobs_diff": 155, "logprobscba": 1, "logprobstil": 1, "london": [90, 151], "long": [7, 8, 10, 12, 16, 17, 23, 24, 28, 29, 31, 32, 33, 34, 41, 79, 88, 91, 93, 95, 98, 99, 104, 105, 113, 131, 132, 133, 134, 135, 138, 139, 144, 147, 158, 160, 173], "long_factor": 141, "long_mscal": [141, 142], "long_rop": 141, "long_rope_embed_posit": 142, "long_rope_embed_positions_for_gpt_attent": 142, "long_rope_rotary_cos_sin": 141, "long_rope_rotary_inv_freq": [141, 142], "longbench": 24, "longer": [0, 8, 10, 13, 15, 16, 18, 20, 21, 28, 29, 31, 32, 33, 34, 43, 94, 98, 114, 117, 138, 141, 155, 178], "longest": [14, 98, 138, 141], "longitud": 33, "longrop": 141, "longtensor": [63, 146], "look": [0, 6, 10, 11, 12, 16, 42, 61, 88, 90, 103, 106, 111, 127, 132, 160], "lookahead": [0, 1, 155, 160], "lookahead_config": [146, 155], "lookahead_decod": [23, 143], "lookaheadalgoconfig": 1, "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 105, 155], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadprompt": 1, "lookaheadruntimebuff": 1, "lookup": [19, 37, 92, 103, 141, 142, 155, 160], "lookup_plugin": 141, "lookuperror": 155, "loop": [0, 10, 11, 19, 20, 40, 103, 104, 111, 114, 125, 126, 140, 155], "loos": 39, "lopuhin": 160, "lora": [0, 1, 22, 55, 77, 85, 87, 89, 105, 111, 130, 141, 142, 143, 144, 146, 155, 158, 160, 170, 172], "lora_0": [42, 132], "lora_adapt": [95, 173], "lora_ckpt_sourc": [23, 95, 146, 155, 173], "lora_config": [42, 67, 95, 132, 143, 155, 160, 173], "lora_dir": [23, 42, 67, 95, 118, 132, 146, 173], "lora_help": [67, 155, 173], "lora_hidden_st": 142, "lora_int_id": [42, 75, 95, 132, 155, 173], "lora_layer_param": 142, "lora_manag": [95, 146, 160], "lora_nam": [42, 75, 95, 132, 155, 173], "lora_param": 143, "lora_path": [42, 75, 95, 132, 155, 173], "lora_plugin": [23, 118, 141, 144, 146], "lora_rank": [118, 141], "lora_req1": [95, 173], "lora_req2": [95, 173], "lora_request": [42, 67, 75, 95, 132, 155, 173], "lora_runtime_param": 142, "lora_target_modul": [23, 42, 95, 118, 132, 143, 146, 173], "lora_task_uid": 146, "lora_uid": 146, "lora_weights_point": 141, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 67, 95, 118, 143, 155, 160, 173], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 146, "loramodulenam": 1, "loraparam": 143, "loraprefetchdir": 0, "lorarequest": [67, 95, 105, 155, 173], "loraruntimeparam": 142, "lorataskidtyp": [0, 1], "loraweight": 118, "lose": 104, "loss": [7, 21, 135], "lost": 160, "lot": [14, 79, 98, 104, 113, 117, 125], "loudspeak": 5, "louvr": 34, "love": 18, "lovelac": [100, 150, 158, 160], "low": [2, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 23, 28, 29, 31, 32, 42, 79, 85, 91, 103, 104, 105, 113, 123, 127, 141, 144, 155, 158, 160, 170], "low_lat": 21, "low_latency_benchmark": 21, "low_latency_gemm": [120, 141], "low_latency_gemm_plugin": [23, 132, 135, 142, 144], "low_latency_gemm_swiglu": 141, "low_latency_gemm_swiglu_plugin": [23, 135, 144], "low_rank": 141, "lower": [0, 1, 2, 6, 7, 8, 15, 17, 21, 28, 29, 31, 32, 33, 34, 36, 43, 69, 82, 88, 93, 98, 100, 103, 104, 105, 110, 114, 115, 117, 118, 135, 141, 147, 155, 166], "lowercas": 155, "lowest": [21, 54, 82, 93, 155, 166], "lowprecis": [119, 141, 155], "lowprecisiondata": 12, "loyalti": 63, "lpddr5x": 16, "lru": [1, 93, 117, 141], "lstrip": 155, "lt": 141, "lunch": 16, "luotuo": [75, 118], "m": [0, 2, 4, 8, 13, 16, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 42, 43, 50, 51, 56, 67, 95, 104, 132, 133, 135, 138, 139, 141, 147, 149, 173], "ma": [29, 33], "macceptancethreshold": 0, "machin": [2, 7, 18, 92, 117, 159, 160], "macro": 120, "madditionalmodeloutput": 0, "maddr": 0, "maddress": 1, "made": [12, 20, 39, 93, 107, 160, 178], "madv_hugepag": 20, "madvis": 20, "magentnam": 0, "magic": 16, "magpi": 19, "mahmoudashraf97": 160, "mai": [0, 1, 2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 26, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 53, 64, 65, 66, 79, 82, 85, 88, 98, 103, 104, 106, 109, 110, 111, 113, 114, 117, 118, 119, 120, 121, 124, 125, 126, 127, 128, 131, 132, 133, 139, 140, 141, 142, 144, 147, 151, 154, 155, 156, 157, 159, 160, 161, 162, 163, 166, 170, 177], "main": [3, 6, 10, 11, 12, 13, 14, 16, 18, 19, 20, 22, 27, 28, 29, 30, 31, 32, 41, 45, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 87, 92, 103, 109, 111, 114, 116, 131, 133, 135, 139, 140, 141, 147, 151, 152, 154, 155, 156, 159, 161, 172], "mainland": 29, "mainli": [14, 16, 39, 104, 155], "mainstream": [17, 88], "maintain": [3, 4, 7, 8, 12, 16, 28, 29, 31, 32, 33, 34, 35, 36, 42, 85, 87, 99, 102, 103, 104, 118, 120, 132, 135, 149, 159, 160, 170, 172], "mainten": [8, 40], "major": [13, 20, 29, 36, 127, 147, 160], "majority_vote_control": 11, "majorityvot": 11, "majorityvotecontrol": 11, "make": [1, 2, 7, 8, 11, 12, 13, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 35, 36, 42, 61, 62, 67, 79, 82, 88, 92, 98, 101, 103, 104, 105, 106, 109, 110, 113, 115, 118, 120, 125, 127, 128, 129, 132, 133, 140, 141, 151, 154, 155, 158, 160, 166], "make_causal_mask": 142, "make_env": 38, "makedir": 61, "makeloopbackag": 0, "makeshap": 1, "maketran": 155, "maketransferag": 0, "malachowski": 61, "malici": 93, "malign": 1, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [23, 141, 144, 149, 150, 155, 160, 166], "mamba1": 141, "mamba2": [141, 160], "mamba_conv1d": 141, "mamba_conv1d_plugin": [23, 144, 146], "mamba_ssm_cache_dtyp": 155, "mamba_vers": 141, "mambaconfig": 143, "mambaforcausallm": 143, "manag": [0, 1, 10, 11, 15, 16, 17, 21, 23, 40, 47, 49, 62, 63, 68, 79, 80, 86, 87, 88, 92, 98, 101, 103, 104, 105, 109, 110, 113, 121, 125, 133, 140, 144, 146, 147, 153, 154, 158, 159, 160, 162, 163, 164, 171, 172], "manage_weight": 144, "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 36, 104, 111, 124], "manhattan": [29, 33], "mani": [0, 10, 11, 14, 15, 16, 18, 19, 21, 23, 26, 28, 29, 31, 32, 43, 79, 88, 93, 94, 98, 103, 110, 113, 116, 117, 121, 125, 127, 129, 135, 138, 140, 141, 144, 150, 151, 155], "manipul": 115, "manner": [12, 16, 20, 115], "mantissa": 4, "manual": [10, 15, 16, 28, 29, 31, 32, 33, 34, 37, 47, 82, 85, 100, 104, 109, 146, 151, 154, 155, 170], "manual_config": 166, "manufactur": [42, 132], "map": [0, 1, 8, 10, 11, 13, 16, 17, 18, 20, 21, 28, 29, 31, 32, 36, 37, 39, 42, 43, 79, 87, 88, 99, 103, 104, 111, 113, 115, 119, 123, 124, 125, 126, 127, 141, 142, 143, 144, 146, 155, 160, 166, 172, 177], "map_loc": 61, "map_weight": [87, 172], "mappingintstrani": 155, "mappingnamespac": 155, "mard1no": 160, "margin": [10, 42, 132, 138], "mark": [1, 12, 19, 36, 37, 88, 98, 103, 115, 138, 141, 151, 157], "mark_as_remov": 115, "mark_output": [111, 141], "markalldon": 1, "markdon": 1, "markdown": 33, "marker": [11, 20, 37, 155], "market": 63, "marks101": 160, "marktaskdon": 1, "mask": [0, 1, 13, 14, 79, 94, 113, 121, 141, 142, 143, 146, 163], "mask_typ": 141, "masked_scatt": 141, "masked_scatter_": 141, "masked_select": [141, 160], "massachusett": 29, "massiv": [2, 21], "master": [134, 135, 136, 137], "mat2": 141, "match": [0, 14, 17, 19, 20, 24, 28, 29, 31, 32, 33, 34, 36, 37, 40, 42, 61, 64, 65, 66, 68, 80, 86, 91, 92, 93, 102, 103, 104, 112, 115, 117, 121, 132, 141, 142, 146, 151, 155, 160, 164, 168, 171], "match_and_rewrit": 115, "matcher": 104, "materi": [1, 111], "materializewithtag": 1, "math": [11, 12, 13, 15, 150, 157], "math500": 11, "matichon": 160, "matmul": [23, 113, 125, 135, 141, 144, 149], "matric": [95, 112, 173], "matrix": [6, 12, 15, 21, 42, 79, 99, 105, 113, 125, 132, 134, 141, 153, 163, 166], "mattentionconfig": 0, "mattentiondpeventsgatherperiodm": 0, "mattentionlayernumperpp": 0, "mattentiontyp": 0, "matter": [21, 117, 166], "matur": 36, "maverick": [86, 103, 105, 121, 158, 171], "max": [0, 1, 3, 4, 5, 8, 9, 10, 15, 16, 19, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 42, 62, 79, 81, 82, 94, 98, 104, 118, 133, 135, 136, 139, 141, 146, 147, 151, 155, 160, 163, 165, 166], "max_": 8, "max_all_reduce_block": 1, "max_attention_window": [93, 94, 140, 155, 160], "max_attention_window_s": [113, 140, 141, 146], "max_attn_valu": 142, "max_batch_s": [2, 9, 10, 11, 14, 18, 19, 21, 22, 23, 24, 26, 27, 30, 42, 47, 48, 62, 64, 68, 70, 79, 80, 82, 84, 104, 113, 118, 122, 124, 125, 127, 132, 135, 138, 139, 141, 143, 146, 147, 151, 152, 155, 160, 164, 166, 168, 169, 177], "max_beam_width": [23, 24, 27, 47, 69, 79, 98, 102, 111, 113, 141, 143, 146, 147, 155], "max_block": [141, 178], "max_blocks_per_seq": 146, "max_blocks_per_sequ": 141, "max_boost_slid": [42, 132], "max_cache_storage_gb": 155, "max_completion_token": 90, "max_concurr": 155, "max_context_length": [141, 142, 146, 147], "max_cpu_lora": [67, 95, 173], "max_decoder_input_len": 143, "max_decoder_seq_len": 23, "max_dist": [113, 141, 142], "max_draft_len": [9, 18, 19, 23, 71, 103, 143, 145, 155], "max_draft_token": 146, "max_encoder_input_len": [23, 143, 155], "max_gen_token": 143, "max_gpu_total_byt": 155, "max_input_len": [22, 23, 42, 98, 118, 122, 124, 125, 132, 143, 146, 147, 155], "max_input_length": [24, 141, 142, 143, 146], "max_kv_seqlen": 141, "max_len": 24, "max_lora": [67, 95, 173], "max_lora_rank": [23, 42, 67, 95, 118, 132, 173], "max_low_rank": 141, "max_matching_ngram_s": [19, 71, 103, 155], "max_medusa_token": 146, "max_multimodal_len": 23, "max_new_token": [36, 70, 146, 147], "max_ngram_s": 155, "max_non_leaves_per_lay": 155, "max_num_draft_token": 10, "max_num_request": [79, 163, 177, 178], "max_num_sequ": 160, "max_num_stream": [104, 155], "max_num_token": [2, 9, 11, 21, 22, 23, 24, 26, 27, 30, 47, 48, 70, 79, 94, 99, 103, 132, 135, 138, 139, 143, 147, 152, 155, 160, 163], "max_output_len": [98, 125, 146, 151, 160], "max_output_length": 24, "max_period": 142, "max_position_embed": [98, 124, 141, 142, 143], "max_position_embedding_len": 141, "max_power_limit": [42, 132], "max_prompt_adapter_token": 155, "max_prompt_embedding_table_s": [23, 146, 155, 160], "max_record": 155, "max_retri": [28, 31, 32], "max_seq_len": [9, 18, 22, 23, 24, 26, 27, 30, 36, 62, 68, 70, 82, 84, 118, 122, 124, 125, 132, 140, 141, 142, 143, 146, 147, 155, 160, 166, 169, 177], "max_seqlen": [79, 98, 113, 141], "max_seqlen_for_logn_sc": 142, "max_sequence_length": [79, 113, 146], "max_split_size_mb": 28, "max_throughput": 21, "max_throughput_benchmark": 21, "max_token": [9, 11, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 44, 45, 46, 53, 54, 56, 61, 62, 63, 68, 69, 70, 71, 72, 73, 74, 75, 82, 88, 90, 93, 95, 140, 155, 159, 166, 168, 173], "max_token_count": 63, "max_tokens_in_buff": [30, 88, 155], "max_tokens_in_paged_kv_cach": [140, 146, 160], "max_total_draft_token": 155, "max_util": [0, 22, 140, 155], "max_verification_set_s": 155, "max_window_s": 155, "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 114], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 111, 160], "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxgputotalbyt": 0, "maxim": [0, 3, 5, 8, 12, 13, 15, 18, 21, 40, 42, 91, 97, 132, 140, 174], "maximum": [0, 1, 2, 5, 8, 11, 16, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 42, 43, 70, 79, 81, 88, 93, 94, 98, 103, 110, 111, 113, 114, 132, 135, 141, 142, 146, 147, 151, 155, 158, 160, 165, 177], "maxinputlen": [1, 114], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 160], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": [0, 27], "maxnumpath": 1, "maxnumsequ": [1, 160], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 111, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 114], "maxsequencelength": 1, "maxsplit": 155, "maxstopwordslen": 1, "maxtoken": [0, 147, 160], "maxtokensinbuff": 0, "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mayb": 33, "maybe_capture_hidden_st": 103, "maybe_to_pybind": 155, "mb": [20, 147, 155], "mbackedstorag": 1, "mbackend": 0, "mbackendagentdesc": 0, "mbackendtyp": 0, "mbackground": 1, "mbackstream": 1, "mbacktyp": 1, "mbadhandl": 1, "mbart": [150, 160], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbind": 20, "mbindoffset": 1, "mbp": 56, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconfigur": 1, "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": [0, 1], "mcopyonpartialreus": 0, "mcp": [11, 160], "mcpu": 1, "mcpudiff": 1, "mcreator": 1, "mcrosskvcachefract": 0, "mct": 11, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [13, 85, 103, 121, 123, 141, 155, 160, 162, 170], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": [0, 1], "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [9, 27, 29, 30, 33, 45, 61, 67, 69, 73, 76, 90, 159, 168], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 17, 19, 22, 24, 26, 28, 29, 31, 32, 33, 36, 41, 42, 43, 50, 51, 53, 65, 67, 79, 90, 93, 98, 103, 104, 112, 113, 114, 117, 121, 124, 126, 127, 131, 132, 133, 134, 140, 141, 144, 146, 147, 155], "meaning": [1, 2, 15, 135, 139], "meant": [33, 136, 137, 144, 155], "meantim": 160, "meanwhil": [10, 20], "mearlystop": 0, "measur": [0, 3, 5, 6, 7, 8, 12, 14, 15, 16, 19, 20, 21, 42, 80, 88, 104, 110, 132, 133, 160, 164], "mechan": [12, 16, 17, 20, 38, 61, 88, 92, 93, 111, 125, 155, 177, 178], "media": [27, 42, 132, 160], "media_io_kwarg": 27, "media_path": [42, 132], "median": [11, 26, 28, 29, 31, 32], "medic": 21, "medium": [7, 24, 29, 86, 151, 160, 171], "medusa": [0, 1, 22, 23, 141, 143, 146, 155, 160], "medusa_choic": [22, 121, 132, 146, 155], "medusa_decode_and_verifi": 146, "medusa_hidden_act": 145, "medusa_logit": 146, "medusa_model_dir": 145, "medusa_output_token": 146, "medusa_path": 146, "medusa_position_offset": 146, "medusa_temperatur": [121, 146], "medusa_topk": 146, "medusa_tree_id": 146, "medusachoic": [0, 1], "medusaconfig": 143, "medusacurtokensperstep": 1, "medusadecodingconfig": [105, 155], "medusaforcausallm": 143, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [7, 16, 17, 29, 33, 104, 141], "megan": 28, "memavail": 20, "membeddingt": 0, "member": [0, 1, 11, 20, 114, 115, 122, 125, 141], "memcpi": 20, "memfre": 20, "meminfo": 20, "memlock": [9, 18, 21, 26, 106, 151, 159], "memori": [0, 1, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 28, 29, 31, 32, 33, 34, 39, 42, 43, 47, 53, 61, 68, 79, 80, 84, 87, 88, 92, 94, 95, 98, 99, 100, 103, 104, 105, 110, 112, 113, 114, 116, 118, 125, 126, 127, 132, 133, 138, 139, 141, 144, 146, 151, 155, 158, 160, 163, 164, 168, 172, 173, 177], "memorydesc": 0, "memorypoolfre": [1, 147], "memorypoolreserv": [1, 147], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memset": [1, 12], "memsetconfigur": 1, "memtot": 20, "memtyp": 1, "memusagechang": 147, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 67, "mental_health_lora_dir": 67, "mention": [2, 11, 12, 20, 21, 28, 29, 31, 32, 33, 34, 47, 91, 114, 127, 135], "mentri": 1, "mentryit": 1, "menu": [128, 129], "merg": [12, 13, 16, 38, 141, 160], "merlin": 21, "meshgrid": 141, "meshgrid2d": 141, "messag": [0, 9, 13, 18, 20, 21, 24, 27, 29, 30, 33, 34, 39, 43, 44, 45, 72, 73, 76, 79, 88, 90, 110, 119, 141, 147, 155, 159, 160, 168], "met": [0, 1, 111, 121], "meta": [24, 31, 32, 42, 43, 61, 65, 71, 80, 82, 86, 127, 132, 133, 140, 150, 154, 155, 157, 164, 166, 168, 171], "meta_ckpt_dir": 143, "metadata": [0, 17, 20, 27, 39, 42, 61, 79, 88, 92, 103, 104, 116, 132, 155, 156, 161, 163], "metadata_server_config_fil": 27, "metal": [153, 160], "meth": 154, "method": [0, 1, 3, 8, 10, 11, 14, 15, 16, 17, 20, 38, 39, 42, 47, 62, 63, 79, 85, 87, 88, 92, 94, 100, 102, 103, 104, 105, 109, 111, 113, 114, 120, 121, 122, 124, 125, 127, 132, 144, 146, 149, 151, 155, 156, 160, 161, 162, 170, 172, 177, 178], "metric": [0, 8, 15, 16, 17, 20, 24, 30, 33, 34, 41, 42, 43, 80, 88, 131, 132, 133, 135, 138, 139, 155, 160, 164], "metrics_log_interv": 27, "metro": 29, "metropoli": [21, 29], "metropolitan": 33, "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfailfastonattentionwindowtoolarg": 0, "mfastlogit": 0, "mfirstgentoken": 0, "mfirsttim": 1, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 160, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [16, 160], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 121, "mh1": 121, "mha": [3, 15, 23, 79, 85, 86, 93, 98, 113, 116, 141, 144, 146, 163, 170, 171], "mhandl": 1, "mhandler": 0, "mhasindexerkcach": 0, "mhiddens": 1, "mhostcaches": 0, "mi": [29, 149], "mib": 147, "micro": [0, 147], "microbatchid": 0, "microbatchschedul": [162, 178], "microsecond": 0, "microsoft": [82, 86, 124, 150, 157, 166, 171], "mid": [19, 36, 160], "middl": [11, 41, 131], "mig": [20, 21], "might": [0, 2, 7, 10, 16, 23, 33, 42, 93, 94, 106, 111, 125, 127, 132, 133, 134, 139, 146, 147, 151, 155, 160, 177], "migrat": [36, 127, 144, 160], "million": [42, 91, 132], "millisecond": [0, 93, 155], "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 4, 13, 14, 15, 22, 35, 42, 104, 114, 132, 133, 139, 141, 151, 155], "min_lat": 141, "min_length": [114, 146], "min_p": [0, 114, 146, 155], "min_token": 155, "mind": [7, 140, 154], "mindexerdimperhead": 0, "mindexerkcachequantblocks": 0, "mindim": 1, "mindimfirst": 1, "mini": [86, 160, 171], "minim": [8, 12, 13, 16, 17, 20, 21, 39, 40, 87, 88, 96, 98, 99, 138, 160, 172], "minimum": [0, 8, 18, 21, 22, 42, 43, 79, 113, 114, 132, 135, 141, 146, 147, 155], "minitron": [86, 150, 157, 160, 171], "minittozero": 1, "minlat": [119, 155], "minlength": [1, 114, 160], "minnormedscorescba": 1, "minor": [40, 160], "minp": [0, 1, 114], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 160], "mintpsplitdim": 1, "minut": [0, 7, 18, 21, 133], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mirror": 155, "mirror_pybind_enum": 155, "mirror_pybind_field": 155, "mish": 142, "mismatch": [127, 151], "misorchestr": 0, "mispagefre": 1, "miss": [0, 42, 115, 132, 160], "missedblock": [0, 27], "missedblocksperrequest": 0, "mission": [8, 13, 16, 17], "mistral": [42, 86, 100, 112, 132, 135, 139, 148, 149, 150, 157, 160, 171], "mistral3": [150, 160], "mistral3forconditionalgener": [150, 157], "mistralai": [42, 86, 132, 150, 157, 171], "mistralforcausallm": [150, 157], "misus": [36, 160], "miterstatsmaxiter": 0, "mitig": [8, 13, 16, 127], "mix": [15, 21, 33, 88, 104, 110, 134, 139, 160], "mixed_precis": 155, "mixed_sampl": 160, "mixer": 160, "mixtral": [42, 86, 100, 112, 118, 132, 135, 139, 149, 150, 157, 160, 171], "mixtralforcausallm": [150, 157], "mixtur": [15, 16, 26, 28, 29, 31, 32, 33, 34, 139, 160, 166], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": [9, 28, 29, 31, 32, 128], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "mkvtransfersenderfuturetimeoutm": 0, "mkvtransfertimeoutm": 0, "ml": [141, 160], "mla": [8, 12, 13, 14, 81, 99, 104, 105, 141, 157, 160, 165], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [150, 160], "mllamaconfig": 143, "mllamaforcausallm": 143, "mllamaforconditionalgener": [150, 157], "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mlop": 21, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [23, 104, 118, 123, 125, 126, 141, 144, 151, 156, 160, 161, 166], "mlp_4h_to_h": [23, 118], "mlp_bia": 143, "mlp_gate": [23, 118], "mlp_gate_up": [23, 118], "mlp_h_to_4h": [23, 118], "mlp_output": 151, "mlp_router": [23, 118], "mlperf": [21, 105], "mlphiddens": 1, "mlptype": 141, "mm": [20, 160], "mm_1": 104, "mm_data": [42, 132], "mm_embed": [62, 155], "mm_embedding_handl": [29, 30, 33, 155], "mm_embedding_offload": 146, "mm_encoder_onli": 155, "mma": [15, 141], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmap": 20, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxgputotalbyt": 0, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumsequ": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmaxtokensinbuff": 0, "mmedusachoic": 0, "mmemori": 1, "mmemorytyp": 1, "mmha": [141, 160], "mminp": 0, "mmintoken": 0, "mmlphiddens": 1, "mmlu": [7, 13, 91, 160], "mmlu_llmapi": 160, "mmmu": [42, 132], "mmode": 1, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmprojector": 160, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mmulticast": 1, "mmultimodalhash": 0, "mmultimodallength": 0, "mmultimodalposit": 0, "mmutex": 1, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnnvl": [16, 99, 119, 141, 155, 160], "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "mobil": 61, "modal": [22, 96, 100, 149, 157, 158], "mode": [0, 1, 10, 15, 18, 20, 21, 22, 23, 24, 27, 30, 38, 40, 56, 64, 65, 66, 79, 88, 91, 95, 101, 103, 104, 112, 113, 115, 125, 126, 140, 141, 142, 146, 147, 149, 155, 156, 160, 161, 173], "mode_t": 0, "model": [0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 36, 39, 40, 41, 44, 45, 46, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 84, 87, 88, 89, 90, 92, 93, 94, 95, 97, 98, 101, 102, 104, 109, 110, 111, 112, 113, 116, 117, 118, 119, 122, 124, 127, 131, 133, 136, 140, 141, 142, 144, 145, 146, 147, 148, 149, 152, 153, 155, 159, 163, 164, 165, 166, 168, 169, 172, 173, 174, 175, 176, 177, 178], "model_architectur": 155, "model_arg": [28, 31, 32], "model_cl": 142, "model_cls_fil": 23, "model_cls_nam": 23, "model_computed_field": 155, "model_config": [23, 87, 146, 155, 156, 161, 172], "model_config_cpp": 146, "model_construct": 155, "model_copi": 155, "model_dir": [11, 14, 64, 96, 118, 122, 123, 124, 125, 126, 127, 132, 134, 143, 145, 151], "model_dump": 155, "model_dump_json": 155, "model_engin": [162, 177], "model_extra": 155, "model_factori": [84, 169], "model_field": 155, "model_fields_set": 155, "model_format": 155, "model_json_schema": 155, "model_kwarg": [82, 84, 166, 169], "model_nam": [16, 34, 43, 65, 146], "model_parametrized_nam": 155, "model_path": [16, 21, 22, 26, 28, 30, 31, 32, 41, 42, 61, 65, 69, 70, 92, 95, 122, 131, 132, 173], "model_post_init": [144, 155], "model_qu": 132, "model_rebuild": 155, "model_valid": 155, "model_validate_json": 155, "model_validate_str": 155, "model_weights_load": [126, 160], "modelconfig": [0, 36, 87, 114, 146, 156, 160, 161, 172], "modelconfigpython": 146, "modelengin": [40, 103, 104, 162, 177], "modelidtomodel": 1, "modeling_deepseekv3": [13, 15], "modeling_gemma3": [87, 172], "modeling_llama": [156, 161], "modeling_mymodel": [156, 161], "modeling_opt": [156, 161], "modeling_util": [87, 95, 155, 156, 161, 172, 173], "modelmodel_dump": 155, "modelmodel_dump_json": 155, "modelnam": 1, "modelopt": [28, 31, 32, 42, 43, 124, 127, 132, 145, 160], "modelpath": 0, "modelrunn": [124, 146, 160], "modelrunnercpp": [146, 160], "modelrunnermixin": 146, "models_as_dict": 155, "modeltyp": [0, 122], "modelvari": 1, "modelweightsformat": 126, "modelweightsload": [126, 160], "moder": [8, 17, 69, 88], "modern": 146, "modif": [12, 29, 92, 115, 125, 155], "modifi": [10, 11, 12, 20, 30, 42, 64, 65, 66, 82, 95, 102, 103, 105, 106, 111, 115, 132, 135, 139, 140, 151, 158, 160, 166, 173], "modul": [0, 1, 8, 11, 12, 13, 16, 17, 21, 22, 23, 27, 64, 65, 66, 79, 87, 88, 103, 104, 105, 106, 113, 114, 123, 124, 125, 126, 139, 141, 142, 143, 145, 146, 151, 155, 156, 160, 161, 166, 172], "modular": [11, 16, 36, 39, 87, 154, 158, 172], "modularli": [17, 88], "module1": 13, "module10": 13, "module11": 13, "module12": 13, "module13": 13, "module2": 13, "module3": 13, "module4": 13, "module5": 13, "module6": 13, "module7": 13, "module8": 13, "module9": 13, "module_id": 118, "module_nam": [87, 172], "module_names_breakdown": [87, 172], "module_weight": [87, 172], "moduleid": [1, 118], "moduleidtomodel": 1, "modulelist": [156, 161], "moduletyp": 1, "modulo": 141, "moe": [8, 12, 13, 14, 16, 23, 26, 30, 31, 32, 33, 34, 43, 60, 91, 104, 105, 118, 126, 139, 141, 143, 144, 155, 160, 166], "moe_4h_to_h": [23, 118], "moe_backend": [21, 28, 36, 70], "moe_cluster_parallel_s": [27, 155], "moe_config": [2, 9, 14, 16, 21, 27, 30, 48, 70, 155], "moe_ep_s": [70, 112], "moe_expert_parallel_s": [27, 30, 60, 70, 99, 155], "moe_finalize_allreduce_residual_rms_norm": 141, "moe_gat": [23, 118], "moe_gemm": 120, "moe_h_to_4h": [23, 118], "moe_load_balanc": 16, "moe_plugin": [23, 144], "moe_rout": [23, 118], "moe_shared_": 16, "moe_tensor_parallel_s": [60, 99, 155], "moe_tp_siz": 112, "moeallreduceparam": 141, "moecomputeroutekernel": 12, "moeconfig": [36, 70, 105, 143, 155], "moeloadbalancerconfig": 155, "moetopk": 160, "mole": 166, "moment": 111, "monboardblock": 0, "mondemand": 1, "monitor": [17, 23, 88, 99, 116, 155], "monitor_memori": [23, 155], "monolith": [79, 98, 113], "monost": 0, "mont": 11, "month": [10, 42, 132], "monthli": [28, 29, 31, 32], "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 43, 47, 49, 52, 63, 68, 69, 79, 80, 81, 84, 85, 88, 91, 93, 94, 98, 99, 102, 104, 105, 106, 110, 111, 112, 113, 114, 115, 116, 121, 123, 124, 125, 132, 133, 135, 138, 139, 140, 141, 144, 147, 151, 152, 154, 155, 156, 159, 160, 161, 163, 164, 165, 166, 169, 170, 176, 178], "moreov": 104, "most": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 15, 17, 20, 21, 33, 36, 37, 38, 41, 52, 57, 58, 60, 62, 69, 80, 88, 93, 94, 98, 102, 108, 109, 114, 116, 121, 125, 127, 131, 136, 137, 139, 140, 141, 147, 151, 152, 154, 155, 158, 159, 160, 164, 176], "mostli": [16, 103, 144], "motiv": 105, "mount": [9, 21, 27, 28, 29, 31, 32, 64, 65, 66, 105, 106], "mount_dest": [64, 65, 66], "mount_dir": [64, 65, 66], "mountain": 29, "moutdim": 1, "moutdimfirst": 1, "mouth": 33, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 16, 36, 39, 92, 103, 104, 116, 127, 141, 151, 155, 160], "movement": [116, 125], "movi": 33, "mownsev": 1, "mownsstream": 1, "mp3": 27, "mp4": [27, 45, 73], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 17, 27, 41, 64, 65, 66, 88, 101, 109, 114, 125, 127, 131, 132, 133, 141, 151, 155, 160], "mpi4pi": [133, 151, 154, 160], "mpi_barri": 127, "mpi_comm_world": 114, "mpi_group_barri": 1, "mpi_sess": 155, "mpicomm": 0, "mpicommsess": [105, 155], "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [21, 124, 125, 133, 151, 154, 160], "mpisess": 155, "mpistat": 0, "mpitopologi": [142, 143], "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mpromptignorelength": 0, "mprompttableoffload": 0, "mprop": 1, "mpt": [7, 104, 149, 150, 160], "mptforcausallm": 143, "mptmodel": 143, "mptrexpertcount": 160, "mqa": [3, 6, 13, 23, 79, 99, 105, 113, 116, 141, 144, 160, 163], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 141], "mrope_param": [142, 146], "mrope_position_delta": [141, 142, 146], "mrope_rotary_cos_sin": [141, 142], "mrope_rotary_cos_sin_s": 143, "mropeconfig": 0, "mropeparam": [142, 146], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 141, "mscale_all_dim": 141, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 155], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 13, 155], "msinktokenlength": 0, "msize": 1, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt": 19, "mt5": 150, "mtag": [0, 1], "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [2, 10, 16, 17, 28, 71, 89, 102, 104, 105, 155, 157, 158, 160, 176], "mtp3": 17, "mtp3_autoregress": 13, "mtp3_top1": 13, "mtp3_top10": 13, "mtp3_top15": 13, "mtp3_vanilla": 13, "mtp_eagle_one_model": [104, 155], "mtpdecodingconfig": [71, 103, 105, 155], "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [10, 11, 12, 14, 16, 20, 41, 43, 98, 117, 125, 131, 138, 147], "mul": 141, "multi": [0, 3, 10, 11, 12, 14, 15, 16, 21, 23, 26, 45, 64, 65, 66, 73, 80, 81, 85, 86, 91, 93, 94, 99, 100, 101, 105, 106, 110, 111, 112, 114, 117, 118, 121, 124, 127, 133, 141, 143, 144, 147, 149, 155, 158, 160, 163, 164, 165, 168, 170, 171], "multi_block_mod": [113, 146, 155, 160], "multi_gpu": 70, "multi_round": [26, 28, 29, 30, 31, 32, 33, 34], "multiblockmod": 0, "multicast": 1, "multicastconfigur": 1, "multidimension": 141, "multihead": [3, 125], "multiheadlatentattent": 169, "multimap": 1, "multimod": [0, 22, 23, 42, 77, 104, 105, 132, 146, 150, 152, 154, 155, 160], "multimodal_embedding_handl": 155, "multimodal_hash": 155, "multimodal_test_data_path": 73, "multimodalembed": 0, "multimodalencod": [105, 155], "multimodalhash": 0, "multimodalinput": 0, "multimodallength": 0, "multimodalmodelrunn": 146, "multimodalposit": 0, "multinod": 134, "multinomi": 114, "multipl": [0, 1, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 27, 28, 29, 30, 36, 37, 55, 61, 62, 64, 68, 69, 79, 82, 86, 88, 93, 94, 95, 98, 99, 102, 108, 110, 111, 112, 113, 114, 115, 116, 117, 121, 125, 126, 133, 134, 135, 138, 141, 142, 144, 146, 151, 155, 158, 160, 163, 166, 171, 173], "multiple_profil": [23, 132, 135, 139, 144, 160], "multipli": [11, 15, 21, 79, 113, 126, 141], "multiply_and_lora": 142, "multiply_collect": 142, "multiprocessor": 125, "multithread": 0, "multiturn": 24, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "museum": 34, "museuvm": 0, "must": [0, 1, 10, 16, 17, 20, 21, 23, 28, 29, 31, 32, 36, 54, 56, 62, 64, 65, 66, 79, 87, 88, 90, 91, 92, 93, 98, 99, 102, 103, 104, 109, 110, 111, 112, 113, 114, 117, 118, 121, 125, 129, 135, 141, 142, 144, 146, 149, 151, 155, 166, 172], "mutabl": 1, "mutablepageptr": 1, "mutat": 104, "mutates_arg": 104, "mutex": [0, 1, 20], "mutlictaskvmod": 160, "mutual": [20, 114, 149], "muvm": 1, "muvmdiff": 1, "mvalu": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "mxfp4": [29, 100, 160], "mxfp8": [29, 100, 160], "mxfp8xmxfp4": 160, "my": [1, 30, 40, 42, 52, 53, 54, 57, 58, 59, 60, 68, 88, 95, 100, 102, 109, 110, 132, 152, 154, 159, 173, 175, 176], "my_config": [82, 166], "my_faster_on": 47, "my_llm_task": 154, "my_model": 123, "my_profile_export": [50, 51], "my_test": 37, "myattent": [156, 161], "myconfig": [156, 161], "mycustomlogitsprocessor": 102, "mycustomweightload": [87, 172], "mydecoderlay": [123, 156, 161], "mydraft": 103, "mymodel": [123, 156, 161], "mymodelforcausallm": [123, 156, 161], "n": [1, 8, 9, 11, 14, 15, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 42, 52, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 78, 79, 80, 88, 94, 97, 98, 102, 103, 104, 105, 109, 110, 113, 118, 121, 124, 125, 132, 133, 138, 141, 142, 143, 144, 147, 148, 149, 151, 152, 154, 155, 157, 159, 160, 164, 174], "n1": [18, 30, 33, 63, 68], "n2": [18, 30, 33, 63], "n3": [30, 33, 63], "n4": [33, 63], "n_worker": 155, "na": [42, 132, 160], "nah": 33, "naiv": [79, 86, 139, 171], "naivepatternrewriter_replaceaddwithsub": 115, "nalso": [21, 33], "name": [0, 1, 9, 10, 12, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 40, 42, 43, 52, 53, 54, 57, 58, 59, 60, 61, 64, 65, 66, 68, 69, 71, 76, 78, 87, 90, 91, 99, 100, 102, 103, 104, 105, 109, 111, 114, 115, 118, 120, 124, 125, 128, 132, 133, 141, 143, 144, 145, 146, 151, 152, 154, 155, 156, 159, 160, 161, 166, 168, 172, 175, 176], "named_network_output": 151, "named_paramet": 126, "namedtupl": 155, "namespac": [0, 1, 143, 144, 154, 155], "nano": [86, 171], "nanobind": [104, 160], "nanoflow": [97, 174], "narg": 70, "narrow": 36, "nation": [42, 132], "nationwid": [42, 132], "nativ": [4, 15, 16, 21, 22, 86, 104, 127, 154, 156, 158, 160, 161, 171], "native_quant_flow": 143, "nativegenerationcontrol": 11, "natur": [8, 15, 16, 19, 27, 45, 73, 127, 133, 155], "naur": [0, 111, 155], "naver": 150, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbook": 28, "nbrnnlayer": 1, "ncache_transceiver_config": 88, "nccl": [13, 16, 23, 88, 110, 119, 125, 141, 144, 151, 155, 160], "nccl_graph_mixing_support": [88, 110], "nccl_p2p_level": 160, "nccl_plugin": [23, 144], "nccl_symmetr": [141, 155], "ncclplugin": 125, "ncclrecv": [16, 141], "ncclsend": [16, 141], "ncoordin": 33, "ncuda_graph_config": 18, "nd": [42, 132, 141], "ndarrai": [141, 142, 146], "ndim": 141, "nearest": [15, 26, 28, 29, 31, 32, 33, 34, 40, 141, 155], "nearli": [4, 15, 20, 105, 115], "necess": 121, "necessari": [0, 1, 10, 11, 13, 15, 16, 38, 40, 67, 93, 99, 112, 121, 135, 141, 155, 160, 177], "necessarili": [1, 88, 110, 125, 147], "necessit": [8, 16], "need": [1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 47, 56, 60, 61, 62, 64, 65, 66, 67, 79, 87, 88, 92, 93, 94, 98, 101, 103, 104, 106, 109, 110, 111, 113, 114, 115, 117, 121, 122, 123, 124, 125, 126, 127, 128, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 151, 154, 155, 156, 158, 160, 161, 162, 163, 166, 172, 177, 178], "needed_block": 178, "needs_kv_cache_rewind": 103, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 10, 63, 140, 141, 155], "neglig": [7, 12, 117, 138], "neighbor": 33, "neighborhood": [29, 33], "neither": [111, 141, 147, 155], "nemo": [23, 86, 101, 124, 133, 146, 149, 150, 158, 160, 171], "nemo_ckpt_dir": 143, "nemo_prompt_convert": 146, "nemotron": [86, 150, 157, 160, 171], "nemotron_na": 160, "nemotronforcausallm": [150, 157], "nemotronh_nano_vl_v2": 157, "nemotronna": [150, 157, 160], "nemotronnasforcausallm": [150, 157], "nenable_attention_dp": 18, "nenable_min_lat": 18, "neox": [79, 113, 114, 149, 150, 160], "nest": [27, 36, 82, 115, 166], "net": [19, 117, 151, 155], "net_guard": 115, "network": [15, 16, 21, 23, 43, 56, 79, 92, 102, 105, 111, 112, 113, 115, 119, 125, 127, 141, 147, 149, 151, 155, 160], "neural": [112, 115, 125, 160], "neva": [150, 160], "never": [1, 10, 20, 42, 54, 93, 115, 132, 140, 155], "nevertheless": 16, "new": [0, 1, 4, 5, 8, 10, 11, 12, 13, 14, 16, 19, 20, 27, 28, 29, 30, 31, 32, 33, 34, 40, 43, 44, 46, 52, 57, 58, 59, 60, 61, 63, 70, 72, 74, 86, 87, 90, 92, 93, 98, 99, 101, 103, 105, 106, 109, 111, 113, 114, 115, 117, 118, 121, 122, 127, 128, 138, 139, 141, 146, 152, 153, 154, 155, 158, 159, 160, 162, 168, 171, 172, 177], "new_block_id": 61, "new_decoder_architectur": [124, 143], "new_generated_id": 146, "new_input": 115, "new_line_token": 63, "new_method": 36, "new_nam": [87, 172], "new_out": 115, "new_request": 61, "new_shap": 141, "new_tensor": 141, "new_token": [61, 146], "new_workflow": 160, "new_york": 90, "newactiverequestsqueuelatencym": 0, "newark": 29, "newer": [150, 160], "newest": [5, 19, 103, 155], "newli": [0, 10, 11, 14, 16, 20, 61, 98, 138, 155], "newlin": 37, "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 4, 10, 11, 12, 14, 16, 26, 28, 29, 31, 32, 35, 40, 93, 97, 98, 103, 104, 105, 106, 118, 121, 125, 127, 134, 135, 138, 139, 140, 146, 147, 148, 150, 157, 158, 160, 174], "next_logit": 146, "next_medusa_input_id": 146, "next_medusa_logit": 146, "next_positive_power_of_2": 104, "next_step_buff": 146, "next_step_tensor": 146, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 14, "nextpositionoffset": 1, "nfinal": 33, "nfirst": 33, "ngc": [9, 21, 28, 29, 31, 32, 38, 106, 108, 109, 153, 160], "ngiven": 30, "ngoanpv": 160, "ngram": [0, 10, 71, 114, 143, 155, 158, 160], "ngramdecodingconfig": [19, 71, 103, 105, 155], "ngramsiz": 0, "ngroup": 141, "nhead": 141, "nhere": 56, "ni": [33, 56, 149], "nic": [16, 88, 110], "nice": 16, "nif": 29, "nixl": [0, 17, 39, 88, 155, 160], "nj": [33, 59], "njane": [52, 57, 58, 59, 60, 109, 152, 154, 159], "njason": 67, "nkv_cache_config": 18, "nlet": [21, 30, 33], "nmh": 146, "nmt": [146, 150, 160], "nn": [87, 104, 141, 156, 161, 172], "no_context": 24, "no_kv_cache_reus": 160, "no_quant": 155, "no_repeat_ngram_s": [114, 146, 155], "no_schedule_after_st": 178, "no_schedule_until_st": 178, "no_skip_tokenizer_init": 22, "no_weights_load": 22, "noauxtckernel": 13, "node": [0, 12, 14, 15, 16, 17, 18, 20, 23, 24, 26, 41, 64, 65, 66, 88, 99, 101, 104, 106, 110, 114, 119, 131, 133, 134, 141, 144, 146, 149, 151, 155, 158, 160], "node1": 30, "node2": 30, "noexcept": [0, 1], "noh": 33, "nomin": [52, 59, 109, 152, 154, 159], "non": [0, 7, 10, 11, 12, 13, 14, 15, 16, 20, 23, 28, 29, 31, 32, 33, 34, 42, 43, 62, 79, 81, 88, 93, 96, 104, 110, 113, 116, 122, 125, 127, 141, 144, 151, 155, 160, 165], "non_block": [61, 63], "non_gated_vers": 141, "none": [1, 11, 19, 23, 24, 26, 27, 28, 31, 32, 40, 42, 47, 54, 61, 63, 67, 69, 70, 71, 73, 78, 79, 87, 93, 94, 97, 98, 102, 104, 114, 115, 123, 126, 127, 132, 133, 138, 141, 142, 143, 144, 145, 146, 151, 155, 156, 160, 161, 163, 172], "nonetyp": [146, 155], "nontrivi": 103, "nonzero": [104, 141], "nope": 12, "nor": [16, 147, 155], "norepeatngrams": [0, 1, 114], "norm": [2, 15, 41, 42, 43, 65, 126, 131, 132, 133, 141, 144, 155, 156, 160, 161], "norm_before_bmm1": [142, 143], "norm_elementwise_affin": 142, "norm_ep": 142, "norm_epsilon": [124, 143], "norm_factor": [79, 113], "norm_num_group": 142, "norm_pre_residual_weight": 141, "norm_quant_fus": [23, 144], "norm_typ": 142, "norm_weight": 141, "normal": [0, 7, 10, 11, 13, 14, 15, 16, 19, 20, 22, 42, 114, 117, 118, 122, 132, 141, 147, 155, 160], "normalize_log_prob": 155, "normalize_weight": 118, "normalized_shap": [141, 142], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [29, 123, 125, 151], "northeast": [29, 33], "northeastern": [28, 29, 30, 31, 32, 33, 159], "not_op": 141, "notabl": 7, "notat": [14, 155], "note": [1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 26, 28, 29, 31, 32, 33, 34, 37, 42, 47, 61, 64, 65, 66, 78, 79, 82, 87, 94, 98, 102, 104, 106, 108, 110, 115, 117, 118, 119, 120, 121, 125, 129, 132, 135, 138, 140, 141, 144, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 161, 166, 172, 177], "noth": [20, 103], "notic": 67, "notif": 0, "notifysyncmessag": 0, "notimplementederror": [11, 127], "notr": 34, "nougat": [100, 149, 150, 160], "novel": 8, "now": [3, 11, 12, 13, 14, 16, 19, 20, 28, 29, 31, 32, 33, 34, 39, 42, 43, 70, 85, 98, 104, 114, 120, 121, 124, 126, 132, 138, 144, 147, 155, 160, 170], "np": 141, "nprioriti": 33, "npy": 146, "npytorch_backend_config": 27, "nsight": [12, 105], "nspeculative_config": 18, "nsy": [12, 41, 131], "ntask": [27, 64, 65, 66, 125], "nthat": 33, "nthere": 144, "nucleu": 69, "null": [1, 9, 18, 21, 28, 29, 30, 31, 32, 33, 34, 42, 65, 80, 124, 132, 144, 159, 164], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 2, 22, 26, 28, 29, 30, 31, 32, 33, 34, 36, 41, 42, 43, 65, 82, 98, 104, 131, 132, 133, 135, 136, 139, 155, 166], "num_accepted_token": 103, "num_attention_head": [124, 141, 142, 143], "num_aud_token": 146, "num_beam": [114, 146], "num_beam_group": 114, "num_block": [146, 177], "num_bucket": [141, 142], "num_capture_lay": 155, "num_channel": [142, 143], "num_class": 142, "num_computed_block": 61, "num_computed_token": [61, 92], "num_concurr": [28, 31, 32], "num_context": [79, 163], "num_ctx_serv": 30, "num_ctx_token": [79, 163], "num_draft_token": [0, 141, 146], "num_draft_tokens_alloc": 103, "num_eagle_lay": 155, "num_embed": 142, "num_experts_per_tok": 112, "num_fewshot": [24, 28, 31, 32], "num_gen_serv": 30, "num_gener": [79, 163], "num_gpu": [21, 29, 33, 34], "num_group": [141, 142], "num_head": [12, 79, 98, 99, 113, 126, 141, 146, 163], "num_hidden_lay": [82, 84, 124, 143, 156, 161, 166, 169, 177], "num_imag": 146, "num_img_token": 146, "num_inst": [17, 88], "num_key_value_head": [124, 143, 177], "num_kv_head": [79, 116, 141, 142, 146, 163, 177], "num_kv_heads_origin": 141, "num_kv_heads_per_cross_attn_lay": 146, "num_kv_heads_per_lay": 146, "num_lay": [141, 142, 146, 177], "num_ln_in_parallel_attn": 143, "num_local_block": 142, "num_local_expert": 112, "num_lora_module_lay": 118, "num_lora_modules_lay": 118, "num_medusa_head": [143, 145, 146, 155], "num_medusa_lay": [143, 145], "num_multimodal_token": 0, "num_nextn_predict_lay": [2, 14, 28, 71, 103, 155], "num_nextn_predict_layers_from_model_config": 155, "num_of_token": 104, "num_orig_po": 141, "num_po": 141, "num_postprocess_work": [27, 29, 30, 33, 155], "num_profil": 143, "num_prompt": [26, 28, 29, 30, 31, 32, 33, 34], "num_q_head": 13, "num_request": [2, 14, 21, 22, 42, 43, 95, 132, 173], "num_return_sequ": [146, 160], "num_sampl": [24, 41, 70, 131], "num_slot": [16, 28, 30], "num_task": 142, "num_token": [8, 13, 79, 92, 113, 141, 163], "num_tokens_per_block": [141, 177], "num_tokens_per_task": 142, "num_video": 146, "numa": [16, 119], "numa_alloc_onnod": 20, "numa_bind": 30, "numacceptedtoken": 0, "numactiverequest": [0, 27], "numactl": [16, 20], "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 42, 43, 62, 64, 65, 66, 79, 80, 81, 88, 91, 92, 93, 94, 98, 100, 102, 103, 104, 110, 111, 112, 113, 114, 116, 121, 125, 132, 133, 134, 135, 138, 139, 140, 141, 142, 146, 147, 149, 151, 155, 156, 160, 161, 163, 164, 165, 177], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxgpu": 17, "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 146, "numensurework": 0, "numer": [13, 20, 36, 40, 114, 119, 132, 150, 155], "numexpert": 1, "numgeneratedtoken": 0, "numgengpu": 17, "numgenrequest": 0, "numgensequ": 1, "numgentoken": [0, 1], "numhead": 114, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 114, "numlanguag": 1, "numlay": 114, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 160, "numnod": [0, 160], "numpag": 1, "numpausedrequest": 0, "numpi": [118, 141, 146], "numputwork": 0, "numqueuedrequest": [0, 160], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 111], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvbugspro": 37, "nvcc": 2, "nvcr": [9, 21, 26, 28, 29, 31, 32, 64, 65, 66, 159, 160], "nvfp4": [12, 13, 16, 22, 23, 28, 30, 31, 32, 34, 35, 42, 86, 100, 132, 144, 155, 160, 171, 175], "nvfp4_awq": 155, "nvfp4_gemm": 104, "nvfp4_gemm_2": 104, "nvfp4_gemm_config": 155, "nvfp4gemmconfig": 155, "nvidia": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 44, 45, 46, 48, 50, 51, 52, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 79, 86, 88, 90, 98, 99, 100, 101, 102, 103, 105, 106, 107, 109, 110, 124, 125, 127, 128, 132, 133, 139, 141, 144, 147, 150, 151, 152, 154, 157, 158, 159, 160, 171, 175, 176], "nvila": [96, 150, 160], "nvinfer1": [0, 1], "nvl": [1, 160], "nvl36": 134, "nvl72": [12, 15, 20, 21, 28, 43, 134, 150], "nvlink": [12, 16, 17, 21, 88, 99, 110, 114, 119, 133, 134, 136, 137, 160], "nvme": 92, "nvswitch": [13, 21, 125], "nvtx": [20, 155], "nwait": 33, "nwe": 21, "ny": [33, 90], "nyc": [29, 33], "nyeah": 33, "nyou": 56, "n\u7b54\u6848": 75, "o": [0, 1, 6, 9, 10, 13, 15, 18, 20, 21, 27, 28, 29, 30, 31, 32, 33, 34, 41, 61, 64, 65, 66, 73, 75, 92, 115, 118, 127, 131, 151, 155], "o_proj": [12, 126, 166], "oai": [27, 45, 73], "oauthtoken": 9, "obei": [104, 151], "obj": 155, "obj0": 155, "obj1": 155, "object": [0, 1, 8, 18, 20, 21, 28, 29, 30, 31, 32, 33, 34, 36, 47, 54, 56, 63, 76, 78, 82, 84, 87, 90, 92, 93, 102, 103, 104, 111, 117, 123, 125, 126, 127, 141, 142, 143, 144, 146, 147, 155, 159, 160, 162, 166, 169, 172], "observ": [8, 12, 15, 17, 19, 20, 43, 62, 88, 103, 104, 110], "obtain": [1, 12, 17, 43, 103, 107, 141], "obtain_answ": 11, "obviou": [2, 15], "occas": 151, "occasion": 160, "occup": [12, 40, 79, 97, 113, 147, 174], "occupi": [7, 10, 15, 16, 29, 147], "occur": [8, 17, 88, 103, 114, 117, 155, 160, 177, 178], "occurr": 155, "ocean": [29, 33, 69], "off": [2, 10, 15, 17, 37, 40, 41, 85, 103, 104, 117, 120, 131, 135, 138, 139, 147, 160, 170], "offer": [7, 8, 10, 11, 13, 17, 19, 20, 38, 79, 100, 125, 163], "offic": 56, "officenetsecur": 56, "offici": [2, 12, 14, 29, 42, 79, 86, 104, 113, 132, 171], "offlin": [5, 8, 15, 24, 43, 52, 96, 99, 105, 123, 132, 160], "offload": [0, 1, 16, 23, 55, 92, 116, 122, 155, 160], "offloadconfigur": 1, "offloading_dis": 62, "offloading_en": 62, "offset": [1, 10, 141, 146, 149, 160], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 7, 11, 12, 13, 16, 17, 33, 40, 88, 111, 116, 121, 134, 135, 141, 155], "oh": 11, "ok": [104, 151], "okai": [11, 33], "old": [14, 115, 118, 151, 155], "older": [106, 117, 127, 150], "oldest": [19, 103, 118, 155], "oldvalu": 0, "omegaconf": [82, 166], "omit": [1, 9, 12, 102, 107, 111, 127, 141], "ompi": [109, 151], "ompi_mca_btl_tcp_if_includ": 154, "ompi_mca_oob_tcp_if_includ": 154, "onboard": [0, 86, 117, 147, 155, 171], "onboard_block": 155, "onboardblock": 0, "onc": [0, 10, 11, 14, 16, 17, 18, 19, 21, 26, 27, 37, 61, 79, 88, 98, 103, 106, 109, 111, 113, 114, 115, 125, 135, 141, 147, 154, 155, 168], "ondemand": 1, "one": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 38, 40, 42, 67, 79, 85, 86, 87, 88, 90, 92, 93, 97, 98, 103, 104, 110, 111, 112, 113, 114, 115, 116, 117, 119, 121, 124, 125, 126, 127, 128, 132, 133, 134, 135, 139, 140, 141, 142, 144, 146, 147, 151, 155, 156, 159, 160, 161, 166, 170, 171, 174, 178], "ones": [0, 10, 20, 82, 104, 118, 155, 159, 166], "oneshot": [13, 119, 141, 155], "oneshotallreduc": 13, "oneshotar": 13, "onevis": [150, 160], "ongo": [8, 10, 16, 21, 28, 29, 31, 32, 40, 127], "onli": [0, 1, 2, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 42, 43, 47, 61, 62, 63, 69, 79, 80, 81, 82, 87, 88, 90, 92, 93, 94, 98, 99, 101, 103, 105, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 125, 126, 127, 132, 133, 134, 135, 138, 139, 140, 141, 142, 144, 146, 147, 150, 155, 157, 160, 162, 164, 165, 166, 172, 178], "onlin": [5, 11, 24, 26, 28, 52, 99, 105, 160], "only_cross_attent": 142, "onnx": [21, 23, 141], "onnx__gathernd": 141, "ontario": 29, "onto": [91, 92, 104, 114], "oom": [1, 2, 3, 6, 15, 28, 29, 31, 32, 33, 34, 147], "ootb": [15, 160], "op": [0, 1, 9, 15, 21, 79, 88, 101, 110, 115, 141, 155, 160], "op_and": 141, "op_or": 141, "op_xor": 141, "opaqu": 115, "opaque_st": 155, "open": [3, 11, 13, 15, 16, 20, 41, 70, 73, 101, 108, 109, 114, 120, 131, 151, 158, 159, 160], "openai": [9, 11, 17, 18, 20, 26, 27, 28, 29, 31, 32, 33, 34, 35, 42, 66, 77, 88, 90, 95, 105, 157, 159, 160, 168, 173], "openaiapi": 11, "openelm": [86, 171], "openipc": 1, "openmpi": 160, "opensora": 160, "opensourc": 160, "openssh": 128, "opentelemetri": [27, 155], "oper": [0, 1, 8, 10, 12, 13, 15, 16, 17, 20, 23, 26, 28, 29, 31, 32, 33, 34, 39, 40, 42, 61, 68, 79, 85, 86, 87, 91, 92, 111, 113, 114, 115, 119, 121, 124, 125, 126, 132, 134, 135, 139, 141, 144, 147, 150, 155, 160, 162, 163, 166, 170, 171, 172, 177], "opportun": [11, 42, 132], "opposit": [10, 63], "opt": [7, 15, 80, 82, 85, 86, 100, 111, 124, 128, 141, 149, 150, 151, 160, 164, 166, 168, 170, 171], "opt_batch_s": [143, 155], "opt_num_token": [23, 143, 155], "optforcausallm": [124, 143], "optim": [1, 3, 4, 5, 6, 7, 9, 11, 16, 18, 21, 22, 23, 26, 28, 29, 31, 32, 33, 34, 35, 39, 42, 43, 52, 57, 61, 63, 68, 82, 85, 86, 87, 91, 93, 94, 97, 98, 99, 100, 101, 103, 105, 106, 109, 111, 114, 115, 116, 119, 120, 121, 125, 127, 132, 134, 135, 136, 137, 141, 144, 147, 150, 151, 152, 153, 154, 155, 159, 160, 162, 163, 166, 168, 170, 171, 172, 174, 175, 177], "optimaladapters": [0, 1], "option": [0, 1, 4, 9, 14, 20, 22, 23, 24, 26, 27, 30, 36, 37, 38, 41, 42, 43, 47, 53, 54, 63, 65, 66, 67, 68, 69, 71, 79, 81, 82, 87, 88, 93, 98, 100, 102, 103, 105, 107, 108, 109, 111, 114, 115, 116, 119, 120, 121, 123, 127, 131, 132, 133, 134, 136, 137, 138, 141, 144, 146, 147, 151, 154, 155, 156, 159, 160, 161, 163, 165, 166, 168, 172, 173, 176, 177], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 143, "optvec": 1, "orchestr": [0, 16, 17, 40, 87, 88, 92, 105, 121, 151, 155, 160, 172], "orchestrator_typ": [101, 155], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 3, 10, 12, 38, 42, 43, 82, 83, 88, 93, 104, 108, 110, 113, 116, 126, 132, 135, 140, 141, 142, 147, 155, 166, 167], "ordin": 155, "org": [0, 1, 23, 28, 29, 31, 32, 97, 109, 112, 118, 141, 144, 149, 174], "organ": [36, 37, 116, 177], "orient": [8, 15, 16, 17], "origin": [0, 10, 12, 14, 15, 16, 19, 20, 24, 29, 39, 63, 79, 85, 91, 95, 98, 103, 104, 113, 115, 118, 119, 141, 155, 156, 160, 161, 170, 173], "original_batch": 19, "original_max_po": 141, "original_max_position_embed": [141, 142], "originaltemperatur": 1, "orin": 21, "oserror": 160, "osl": [3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 42, 43, 105, 132, 139], "osl256": 17, "oss": [35, 100, 105, 120, 157, 158], "ostream": [0, 1], "other": [0, 1, 3, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 33, 36, 37, 39, 42, 43, 47, 54, 64, 65, 66, 78, 79, 87, 88, 93, 94, 98, 99, 101, 103, 104, 106, 110, 111, 112, 113, 114, 117, 119, 120, 121, 125, 126, 127, 133, 134, 135, 138, 139, 140, 141, 144, 146, 147, 151, 154, 155, 157, 160, 163, 172, 178], "other_audio_input": 146, "other_decoder_input": 146, "other_vision_input": 146, "othercach": 1, "otherwis": [0, 1, 10, 11, 20, 33, 42, 47, 61, 62, 79, 88, 103, 104, 110, 111, 113, 114, 132, 141, 146, 151, 155, 163], "otlp_traces_endpoint": [27, 155], "our": [2, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 36, 42, 43, 52, 56, 57, 58, 60, 61, 83, 85, 96, 98, 104, 109, 132, 135, 138, 139, 141, 151, 152, 154, 156, 159, 160, 161, 167, 170], "ourselv": 104, "out": [0, 1, 3, 4, 5, 6, 10, 12, 13, 14, 15, 16, 18, 19, 21, 28, 29, 31, 32, 33, 34, 35, 37, 38, 41, 43, 52, 64, 65, 66, 88, 92, 96, 98, 104, 110, 118, 127, 131, 135, 138, 139, 141, 147, 154, 159, 160, 166], "out_bia": 142, "out_channel": 142, "out_context_dim": 142, "out_dim": 142, "out_featur": [124, 125, 142], "out_hidden_s": 141, "out_of_tree_exampl": [156, 161], "out_point": 141, "out_proj": 166, "out_tp": [3, 6], "outcom": [20, 61, 62], "outdim": 1, "outdimfirst": 1, "outer": [82, 141, 166], "outermost": 11, "outgrow": 19, "outlin": [41, 131], "outlook": 105, "outperform": 17, "output": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 30, 33, 34, 35, 36, 40, 41, 42, 43, 47, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 79, 88, 90, 91, 92, 95, 98, 102, 104, 105, 109, 113, 114, 115, 117, 118, 121, 125, 131, 133, 134, 135, 136, 137, 139, 140, 141, 142, 144, 146, 151, 152, 154, 155, 159, 160, 162, 163, 173, 178], "output_a": 62, "output_b": 62, "output_bench": 26, "output_ctx0": 17, "output_ctx1": 17, "output_cum_log_prob": 146, "output_dim": 142, "output_dir": [23, 24, 118, 122, 123, 124, 125, 127, 132, 134, 143, 145, 151], "output_directori": 155, "output_dtyp": [141, 142], "output_gen0": 17, "output_gen1": 17, "output_generation_logit": 146, "output_id": 146, "output_json": 22, "output_length": 30, "output_log_prob": 146, "output_multiplier_scal": 143, "output_pad": [141, 142], "output_path": [16, 28, 31, 32], "output_s": 142, "output_seqlen": [3, 6], "output_sequence_length": 146, "output_str": 11, "output_timing_cach": [23, 155], "output_token": [42, 132], "outputbuff": 1, "outputconfig": [0, 47, 111, 160], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 111], "outsid": [79, 88, 104, 110, 121, 127, 159, 163], "outsiz": 1, "outstand": [10, 11, 12, 14, 20], "outtpsplitdim": 1, "outweigh": 134, "over": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 28, 29, 31, 32, 33, 34, 41, 69, 78, 80, 91, 98, 117, 121, 126, 129, 131, 132, 134, 138, 139, 141, 160, 164, 168], "overal": [2, 8, 10, 11, 12, 14, 15, 16, 17, 26, 40, 79, 88, 98, 111, 113, 117, 119, 121, 134, 135, 138, 139, 140, 156, 161], "overcom": [13, 79, 113, 125], "overflow": 1, "overhead": [0, 8, 10, 12, 13, 14, 15, 17, 19, 21, 40, 68, 79, 88, 92, 97, 99, 104, 110, 111, 125, 134, 155, 160, 163, 174], "overiew": 132, "overkil": 33, "overlap": [0, 2, 10, 13, 14, 15, 16, 82, 85, 89, 96, 102, 103, 105, 110, 121, 153, 155, 157, 160, 166, 170, 178], "overlap_schedul": 71, "overload": [0, 1], "overrid": [1, 11, 27, 47, 82, 105, 126, 127, 141, 146, 155, 160, 166, 168], "overridden": [38, 106, 155], "override_field": 143, "overse": 36, "overshadow": 134, "oversubscrib": [21, 133, 154], "overus": 37, "overview": [2, 7, 8, 16, 39, 41, 42, 64, 65, 66, 85, 104, 105, 106, 110, 111, 116, 131, 132, 133, 153, 162, 168, 170], "overwhelm": [33, 67], "overwrit": [22, 24, 26, 27, 79, 82, 113, 166], "own": [0, 1, 2, 10, 11, 14, 16, 18, 19, 21, 42, 47, 64, 82, 85, 86, 87, 101, 103, 104, 106, 117, 121, 124, 125, 126, 127, 156, 161, 166, 170, 171], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 9, 18, 21, 26, 28, 29, 31, 32, 40, 63, 64, 65, 66, 69, 102, 103, 114, 121, 128, 143, 146, 155, 159, 160, 176], "p2p": [16, 92, 141], "p50": [42, 43, 132], "p90": [42, 43, 132, 133], "p95": [42, 43, 132, 133], "p99": [26, 28, 29, 31, 32, 42, 43, 132, 133], "p_max": 0, "p_x": 0, "pa": 33, "pack": [0, 1, 10, 23, 93, 98, 114, 140, 141, 143, 144, 147, 156, 161], "packag": [24, 42, 85, 106, 109, 111, 132, 133, 151, 160, 170], "packed_length": 143, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "packet": 12, "pad": [0, 1, 10, 15, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 40, 79, 82, 98, 103, 114, 115, 118, 141, 142, 144, 146, 147, 155, 160, 166], "pad_id": [146, 155], "pad_lda": 142, "pad_ldc": 142, "pad_token_id": 146, "padding_2d": 141, "padding_back": 141, "padding_bottom": 141, "padding_en": [26, 160], "padding_front": 141, "padding_left": 141, "padding_mod": 142, "padding_right": 141, "padding_sid": 11, "padding_top": 141, "padid": 0, "page": [1, 5, 23, 30, 37, 42, 81, 84, 85, 103, 105, 110, 114, 117, 125, 132, 133, 135, 141, 144, 147, 155, 158, 160, 163, 165, 168, 169, 170], "page_s": [20, 155], "paged_context_fmha": [135, 160], "paged_kv_cach": [23, 118, 132, 144, 146], "paged_st": [23, 144, 146], "pagedcontextfmha": 1, "pagedkvcach": 114, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "paid": 16, "pair": [0, 1, 3, 14, 19, 93, 135, 139, 141, 155], "panel": 11, "panoram": 33, "paper": [4, 10, 14, 15, 17, 79, 88, 91, 99, 103, 110, 118, 121, 149, 163], "par": [138, 139], "parallel": [0, 1, 2, 3, 5, 6, 8, 10, 11, 14, 17, 18, 19, 21, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 43, 60, 64, 65, 66, 70, 79, 85, 88, 89, 94, 101, 104, 105, 110, 111, 113, 114, 121, 124, 125, 135, 136, 137, 141, 142, 143, 144, 147, 154, 155, 156, 157, 158, 160, 161, 166, 170, 178], "parallel_attent": [124, 143], "parallel_config": [99, 155], "parallelconfig": [0, 36, 160], "parallelprocess": 11, "param": [0, 1, 52, 57, 58, 59, 60, 109, 126, 141, 142, 146, 152, 154, 155, 159], "paramet": [0, 1, 2, 8, 9, 11, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 36, 37, 38, 42, 63, 64, 65, 66, 69, 78, 79, 80, 82, 87, 88, 90, 93, 94, 95, 98, 102, 111, 112, 113, 116, 117, 118, 121, 122, 124, 125, 126, 127, 132, 134, 135, 136, 137, 140, 141, 142, 143, 144, 146, 147, 155, 160, 163, 164, 172, 173, 176], "parametr": [146, 155], "params_imply_greedy_decod": 155, "parent": [0, 1, 42, 126, 127, 132, 155], "parenthash": 0, "parentid": 1, "pareto": [10, 17, 20, 91], "pari": [34, 52, 57, 58, 59, 60, 90, 109, 152, 154, 159], "park": 33, "pars": [1, 22, 82, 87, 155, 166, 172], "parse_arg": [62, 67, 68, 70], "parse_argu": 70, "parse_fil": 155, "parse_obj": 155, "parse_raw": 155, "parser": [27, 62, 67, 68, 70, 77, 82, 87, 105, 155, 166, 172], "part": [1, 10, 11, 15, 19, 29, 30, 33, 37, 39, 43, 93, 103, 104, 105, 106, 111, 112, 115, 125, 126, 127, 138, 139, 140, 141, 146, 147, 154, 155, 160], "part2": 160, "parti": [10, 39, 108, 109, 160], "partial": [0, 8, 10, 13, 37, 40, 61, 91, 92, 112, 117, 125, 134, 146, 155, 166], "particip": [0, 20, 141, 160], "participantid": [0, 110], "particular": [0, 10, 29, 38, 93, 103, 111, 136, 137, 138, 139], "particularli": [8, 10, 12, 13, 15, 16, 17, 20, 40, 82, 88, 106, 109, 139, 166, 177], "partit": [21, 30, 64, 65, 66, 79, 91, 99, 104, 113, 118, 125, 155], "pass": [0, 1, 10, 16, 20, 21, 22, 24, 28, 29, 31, 32, 33, 34, 36, 37, 40, 41, 42, 47, 61, 63, 67, 78, 79, 81, 87, 90, 92, 99, 102, 103, 104, 111, 113, 115, 117, 118, 121, 125, 126, 131, 132, 133, 135, 138, 139, 141, 142, 143, 144, 146, 147, 154, 155, 156, 160, 161, 162, 163, 165, 172, 175, 178], "password": 9, "past": [0, 14, 16, 17, 79, 94, 98, 113], "past_key_valu": [141, 142], "past_key_value_length": 142, "past_key_values_length": 142, "past_kv_length": 146, "past_sequence_length": 146, "patch": [142, 146], "patch_siz": [142, 143], "path": [0, 1, 2, 9, 14, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 41, 42, 43, 47, 52, 57, 58, 59, 60, 61, 64, 65, 66, 67, 69, 71, 73, 75, 79, 81, 87, 95, 100, 103, 104, 106, 109, 111, 113, 120, 121, 124, 126, 131, 132, 133, 135, 141, 146, 152, 154, 155, 159, 160, 165, 166, 168, 172, 173], "path1": 30, "path2": 30, "path_to_llama_from_hf": 162, "pathlib": [61, 73, 75, 155], "pathlik": 143, "pathorn": 160, "pathsoffset": 1, "pattern": [8, 10, 13, 15, 16, 19, 20, 35, 37, 62, 68, 80, 86, 90, 99, 104, 112, 141, 155, 160, 164, 166, 171], "patternanalyz": 115, "patternrewrit": 115, "paus": [0, 16, 140, 178], "paused_request": 178, "payload": 12, "pcie": [16, 119], "pd": [16, 160], "pdf": [0, 112, 118], "pdl": [13, 20, 21, 160], "peak": [0, 2, 3, 4, 8, 13, 43], "peer": [16, 92], "peft": 155, "peft_cache_config": [47, 95, 155, 160, 173], "peftcacheconfig": [0, 95, 155, 173], "peftcachemanag": [0, 160], "penal": [0, 114, 155], "penalti": [0, 8, 102, 155, 160, 176], "penalty_alpha": 114, "pend": [61, 178], "pending_load": 61, "pending_request": 178, "pennsylvania": [29, 30, 33], "peopl": [11, 12, 24, 33], "pep": 10, "per": [0, 1, 2, 3, 5, 6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 39, 42, 43, 62, 64, 65, 66, 78, 79, 88, 91, 92, 93, 94, 98, 100, 104, 110, 111, 113, 114, 116, 119, 121, 125, 127, 132, 133, 134, 135, 141, 142, 144, 147, 149, 155, 160], "per_channel": 149, "per_group": 149, "per_token": 149, "per_token_scal": 141, "perceiv": 4, "percent": [0, 122], "percentag": [22, 42, 43, 118, 122, 132, 133], "percentil": [26, 28, 29, 30, 31, 32, 33, 34, 42, 132, 160], "perf": [0, 2, 15, 17, 21, 29, 43, 77, 105, 141, 155, 160], "perf_best_practic": 160, "perf_metrics_max_request": 155, "perfect": [8, 16, 17], "perfectli": [11, 16], "perform": [0, 1, 3, 5, 6, 11, 14, 15, 23, 26, 27, 30, 35, 39, 40, 42, 43, 47, 61, 68, 79, 85, 86, 87, 88, 91, 92, 93, 94, 96, 98, 99, 101, 103, 105, 106, 110, 111, 113, 114, 115, 118, 120, 125, 126, 127, 132, 134, 137, 138, 140, 141, 144, 146, 150, 154, 155, 156, 159, 160, 161, 163, 166, 168, 170, 171, 172, 177], "performantli": 3, "perhap": 93, "period": [0, 8, 16, 93, 155], "permiss": 160, "permut": [104, 141], "permute_2": 104, "perplex": [86, 171], "persimmon": 160, "persist": [7, 8, 20, 61, 92], "persistentkvcacheconnector": 61, "persistentkvcacheconnectorlead": [61, 92], "persistentkvcacheconnectormetadata": [61, 92], "persistentkvcacheconnectorwork": 61, "person": [63, 67, 128], "perspect": [8, 10, 20, 99], "pertain": [82, 166], "phase": [0, 3, 6, 8, 10, 11, 13, 14, 15, 16, 17, 20, 23, 39, 88, 91, 92, 94, 98, 103, 104, 110, 115, 121, 132, 136, 137, 138, 139, 140, 141, 144, 147, 155, 158, 160, 163, 177], "phi": [82, 86, 100, 141, 148, 149, 150, 157, 158, 160, 166, 171], "phi3config": 143, "phi3forcausallm": [143, 150, 157], "phi3model": 143, "phi4mmforcausallm": [150, 157], "phiconfig": 143, "phiforcausallm": 143, "phimodel": 143, "physic": [20, 141, 147], "pick": 138, "pickl": 160, "picklabl": 92, "pictur": [29, 33], "pie": 19, "piec": [1, 16, 98, 138], "piecewis": [105, 155, 160], "pil": [22, 73], "pillar": 12, "pin": [0, 1, 117], "ping": 160, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [2, 9, 24, 28, 29, 30, 31, 32, 33, 34, 85, 101, 106, 108, 160, 170], "pip3": [85, 109, 170], "pipefail": [29, 30, 33, 34], "pipelin": [0, 1, 3, 6, 21, 22, 23, 24, 27, 40, 43, 60, 82, 97, 104, 105, 111, 114, 125, 132, 136, 137, 144, 147, 155, 158, 160, 166, 178], "pipeline_parallel_s": [27, 30, 60, 134, 135, 155], "pipelineparallel": [0, 1, 114], "pipelineparallelismrank": 1, "pitfal": [104, 117, 127], "pixart": 142, "pixartalphatextproject": 142, "pixel": 96, "pixel_valu": 143, "pkl5": 160, "pl": [42, 109, 132], "place": [1, 10, 16, 20, 23, 29, 36, 93, 104, 109, 112, 141, 144, 156, 160, 161], "placemen": 16, "placement": [13, 16, 99], "plai": [21, 98, 138], "plain": 33, "plan": [13, 16, 17, 20, 33, 35, 39, 85, 106, 111, 113, 155, 160, 170], "plane": 39, "planner": 160, "plateau": 98, "platform": [8, 16, 21, 32, 42, 49, 52, 57, 58, 60, 106, 109, 128, 129, 132, 152, 153, 154, 159, 160], "pleas": [2, 3, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 28, 29, 31, 32, 42, 43, 47, 49, 56, 68, 79, 82, 88, 90, 91, 93, 94, 96, 101, 104, 106, 107, 110, 113, 115, 119, 121, 123, 129, 132, 134, 136, 137, 141, 151, 152, 153, 154, 155, 160, 166, 178], "plot": [17, 19], "plu": [12, 16, 17, 86, 119, 146, 171], "plug": 21, "plugin": [79, 87, 106, 113, 114, 115, 122, 124, 138, 141, 143, 147, 149, 151, 155, 160, 172], "plugin_config": [135, 139, 141, 143, 155], "plugin_namespac": 115, "plugin_typ": 115, "plugin_v2": 115, "plugin_v2_gemm_0": 151, "pluginconfig": [144, 155], "pluginfield": 160, "pluginv2build": 151, "pm": [2, 13, 42, 132], "pmi": 151, "pmi2_init": 151, "pmix": [27, 64, 65, 66, 125, 151], "png": [27, 45, 51, 73], "po": 142, "point": [1, 4, 7, 11, 16, 17, 20, 21, 24, 33, 35, 40, 43, 56, 60, 64, 79, 85, 91, 92, 98, 100, 104, 113, 125, 134, 140, 141, 149, 151, 155, 159, 160, 170], "pointer": [0, 1, 10, 16, 104, 114, 126, 141, 146, 160], "pointerelementtyp": 1, "pointermemorymap": 1, "polar": [150, 157], "polic": 33, "polici": [0, 1, 16, 19, 21, 22, 42, 54, 99, 132, 133, 147, 155], "poll": [0, 27, 92], "polyhedr": 125, "pong": 160, "pool": [0, 1, 15, 16, 17, 19, 20, 79, 88, 93, 98, 103, 113, 141, 146, 155, 160, 177, 178], "pooled_project": [142, 143], "pooled_projection_dim": 142, "pooledpin": 0, "poor": 8, "popd": 151, "popfirstgentoken": 0, "popul": [1, 76, 79, 90, 113, 125, 141, 155], "popular": [7, 11, 14, 105, 113, 124, 127, 129, 154, 158, 159], "port": [0, 9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 39, 48, 88, 129, 159], "portfolio": 5, "portion": [91, 99, 112, 134, 141, 147], "pos_emb_typ": 141, "pos_embd_param": [79, 163], "pos_embed_max_s": 142, "pos_embed_typ": 142, "pose": [36, 139], "posit": [0, 1, 12, 13, 14, 62, 121, 132, 141, 142, 146, 155, 160, 163], "position_embed": [141, 142], "position_embedding_typ": [79, 113, 124, 141, 142, 143], "position_encoding_2d": 143, "position_id": [79, 143, 146, 151, 156, 161, 163], "positionalembeddingparam": [79, 163], "positionembeddingtyp": [79, 113, 141, 142, 143], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 10, 11, 12, 14, 15, 16, 17, 20, 23, 28, 29, 31, 32, 33, 34, 36, 38, 41, 43, 47, 79, 88, 98, 102, 104, 106, 109, 110, 111, 113, 114, 117, 121, 125, 131, 132, 135, 138, 140, 141, 147, 151, 155, 160, 162], "possibli": [1, 104, 116, 141], "post": [0, 4, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 20, 40, 41, 43, 78, 88, 89, 124, 131, 141, 155, 157, 159, 160], "post_act_fn": 142, "post_attention_layernorm": [126, 156, 161], "post_input_id": 146, "post_layernorm": [123, 124, 126, 141, 151], "post_merg": 37, "post_pad": 141, "post_proc": 160, "post_prompt": 146, "post_strid": 141, "posterior_threshold": 155, "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postproc_param": 155, "postproc_work": 155, "postprocess": [27, 91, 142, 155, 160], "postprocess_tokenizer_dir": 155, "postprocessor": [0, 155], "postprocparam": 155, "postprocwork": 155, "potenti": [0, 1, 8, 10, 11, 12, 15, 16, 19, 20, 23, 41, 42, 93, 116, 121, 131, 132, 135, 144, 156, 161], "pow": 141, "power": [5, 7, 12, 13, 15, 16, 20, 21, 63, 82, 90, 93, 104, 117, 125, 138, 160, 166], "pp": [0, 3, 6, 17, 22, 27, 39, 42, 43, 88, 110, 114, 118, 132, 133, 141, 144, 160], "pp1": 43, "pp2": [17, 43, 88, 132], "pp4": 43, "pp8": 43, "pp_communicate_final_output_id": 146, "pp_communicate_new_token": 146, "pp_partit": 155, "pp_reduce_scatt": [23, 139, 144], "pp_size": [22, 24, 27, 43, 48, 104, 124, 125, 132, 134, 145, 160], "ppreducescatt": 1, "pq": 155, "pr": [10, 12, 13, 16, 20], "practic": [4, 5, 13, 15, 16, 17, 80, 103, 105, 113, 116, 125, 147, 160, 164], "practition": [28, 29, 31, 32], "pre": [0, 1, 16, 17, 20, 21, 24, 26, 30, 34, 37, 38, 40, 42, 64, 65, 66, 79, 88, 99, 103, 104, 106, 108, 109, 111, 113, 124, 132, 141, 147, 155, 158, 159, 160, 163], "pre_input_id": 146, "pre_layernorm": 141, "pre_merg": 37, "pre_onli": 142, "pre_pad": 141, "pre_prompt": 146, "pre_quant_scal": [124, 155], "pre_strid": 141, "prealloc": 104, "prebuilt": 106, "preced": [104, 125, 141], "precis": [1, 2, 3, 7, 11, 16, 20, 21, 22, 23, 28, 33, 42, 100, 105, 114, 126, 132, 135, 139, 144, 147, 150, 155, 158, 160], "precompute_relative_attention_bia": 143, "precomputed_relative_attent": 142, "predefin": [79, 121, 156, 161, 163], "predict": [1, 8, 10, 13, 14, 16, 40, 113, 121, 160], "predictor": 121, "predictsdrafttoken": 1, "prefer": [7, 20, 106, 120, 168], "prefer_managed_weight": 142, "prefer_plugin": 141, "preferenti": 93, "prefetch": 13, "prefil": [0, 10, 12, 15, 16, 17, 22, 27, 61, 70, 88, 89, 91, 92, 103, 104, 105, 136, 137, 148, 155, 157, 158, 160], "prefill_batch_s": 36, "prefix": [11, 14, 19, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 68, 92, 93, 103, 111, 121, 124, 133, 141, 151, 154, 155], "prefix_token_ad": 63, "preliminari": [3, 5, 6, 16], "preload": 126, "prem": 21, "premis": [12, 14], "prepar": [0, 12, 13, 14, 16, 24, 28, 29, 31, 32, 40, 41, 65, 79, 88, 98, 102, 103, 104, 110, 131, 138, 141, 143, 149, 160, 163, 176], "prepare_dataset": [2, 41, 42, 43, 65, 105, 131, 132, 133], "prepare_draft_token": 103, "prepare_input": [143, 147], "prepare_position_ids_for_cogvlm": 146, "prepare_recurrent_input": 143, "prepare_resourc": [103, 162, 177], "prepend": 151, "preprocess": [22, 87, 96, 126, 146, 149, 160, 172], "preprocess_weights_hook": 143, "preprocessed_dataset": 22, "preprocessor": [17, 42, 132], "preqrequisit": 109, "prequant_scaling_factor": 124, "prerequisit": [105, 109], "presenc": [0, 114, 125, 155], "presence_penalti": [146, 155, 160], "presencepenalti": [0, 1, 114], "present": [0, 8, 9, 10, 16, 17, 20, 26, 28, 29, 31, 32, 33, 34, 42, 43, 88, 94, 98, 103, 132, 138, 139, 149, 155, 160], "preserv": [104, 135], "presid": [52, 53, 54, 109, 133, 140, 152, 154, 159], "press": [26, 30], "pressur": [16, 20], "pretrained_config": [87, 156, 161, 172], "pretrained_model_name_or_path": 143, "pretrainedconfig": [123, 127, 143, 144, 155, 156, 161], "pretrainedmodel": [127, 143, 147], "pretrainedtoken": 63, "pretrainedtokenizerbas": 155, "prevdrafttokenslen": 1, "prevent": [8, 10, 11, 13, 15, 21, 22, 28, 29, 31, 32, 33, 34, 93, 94, 109, 154, 160], "preview": 160, "previou": [1, 2, 4, 9, 11, 12, 14, 16, 19, 20, 39, 40, 42, 87, 93, 97, 107, 111, 112, 120, 121, 127, 132, 133, 134, 135, 138, 139, 140, 160], "previous": [1, 3, 19, 20, 40, 61, 93, 98, 103, 104, 120, 135, 138, 140, 160], "previous_batch": [40, 97], "previous_tensors_devic": [40, 97], "prevscor": 1, "price": [42, 132], "priem": 61, "primari": [0, 1, 7, 12, 16, 20, 29, 36, 39, 87, 93, 116, 147, 160, 172, 178], "primarili": [2, 12, 20, 39, 79, 104, 163], "primary_kernel": 12, "primit": [15, 16, 20, 125], "principl": [8, 16, 105], "print": [1, 10, 11, 22, 27, 37, 43, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 79, 90, 104, 109, 113, 132, 133, 140, 147, 151, 152, 154, 155, 159], "print_iter_log": [2, 21, 26, 30, 34, 65, 70, 155], "printabl": 155, "prior": [23, 33, 39, 106, 111], "priorit": [7, 8, 40, 93, 98, 101, 138, 140], "prioriti": [0, 1, 82, 93, 116, 117, 126, 155, 166], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 18, 19, 114, 144, 155], "privileg": 115, "prm": [150, 157], "prmreward": 11, "pro": [13, 19, 43, 158, 160], "prob": [141, 160, 176], "probabilist": 142, "probabl": [0, 1, 10, 13, 14, 21, 33, 69, 78, 102, 114, 117, 121, 141, 146, 155, 160], "probe_answ": 11, "probe_respons": 11, "probe_suffix": 11, "probe_suffix_token_num": 11, "probe_task": 11, "probe_text": 11, "probil": 1, "problem": [2, 8, 10, 11, 15, 20, 79, 99, 101, 113, 151, 160], "problemat": [10, 103], "proc": [20, 126], "proccessed_weight": 126, "proccessed_zero": 126, "proce": [11, 17, 20, 88, 106, 155], "procedur": [2, 103], "proceed": [8, 10, 125], "process": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 56, 60, 61, 62, 63, 64, 65, 66, 78, 79, 85, 86, 87, 88, 92, 94, 96, 97, 98, 99, 102, 104, 110, 111, 113, 114, 116, 119, 121, 124, 125, 127, 131, 132, 133, 134, 138, 139, 140, 141, 144, 146, 151, 154, 155, 156, 158, 159, 160, 161, 162, 163, 170, 171, 172, 178], "process_input": 146, "process_logits_including_draft": 146, "processor": [0, 17, 55, 79, 89, 96, 105, 113, 143, 146, 155, 157, 160], "processorbatch": 0, "processormap": 0, "prod": 141, "produc": [0, 1, 10, 11, 12, 21, 42, 47, 78, 91, 98, 102, 103, 104, 111, 115, 125, 132, 133, 135, 138, 139, 141, 160], "product": [5, 8, 9, 16, 17, 19, 20, 21, 24, 34, 61, 63, 79, 80, 82, 86, 88, 92, 98, 99, 112, 113, 121, 125, 138, 139, 140, 141, 163, 164, 166, 171], "profession": 18, "profil": [12, 20, 23, 50, 51, 105, 135, 138, 141, 144, 146, 147, 151, 155, 160], "profiling_verbos": [23, 155], "profit": [42, 121, 132], "program": [10, 11, 20, 52, 57, 58, 60, 61, 109, 127, 140, 151, 152, 154, 159], "progress": [1, 8, 11, 13, 42, 85, 132, 141, 155, 170], "proj": [124, 126, 151], "project": [10, 12, 15, 20, 79, 85, 101, 104, 106, 108, 109, 113, 118, 141, 142, 156, 161, 166, 170, 177], "projector_hidden_act": 143, "prologu": [64, 65, 66], "promin": 121, "promis": [12, 14, 17, 121, 127], "promot": 160, "prompt": [0, 2, 10, 11, 17, 19, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 40, 42, 46, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 74, 75, 78, 79, 81, 82, 88, 90, 92, 93, 95, 98, 102, 103, 109, 111, 114, 117, 121, 123, 132, 133, 138, 139, 140, 142, 146, 152, 154, 155, 159, 160, 163, 165, 166, 173, 176], "prompt_a": 62, "prompt_adapter_request": [155, 160], "prompt_b": 62, "prompt_budget": [70, 155], "prompt_embedding_t": [142, 143, 146], "prompt_embedding_table_s": 143, "prompt_id": 63, "prompt_ignore_length": [146, 155], "prompt_len": [79, 163], "prompt_logprob": [155, 160], "prompt_lookup": 160, "prompt_lookup_num_token": 114, "prompt_tabl": 146, "prompt_task": [143, 146], "prompt_token": [18, 21, 28, 29, 30, 31, 32, 33, 34, 159], "prompt_token_id": [21, 28, 29, 30, 31, 32, 33, 47, 155], "prompt_tokens_detail": 30, "prompt_vocab_s": [143, 146], "promptadapterrequest": 155, "promptignorelength": [0, 1], "promptinput": [155, 160], "promptlen": 0, "promptli": [10, 17, 88], "prompts_dir": 24, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 142, "prompttuningen": 1, "prone": 36, "pronounc": [8, 12, 16, 20, 121], "proof": 177, "prop": 1, "propag": [1, 104, 117, 160, 166], "propel": 8, "proper": [20, 22, 36, 42, 68, 82, 91, 110, 132, 166], "properli": [10, 16, 18, 20, 21, 28, 29, 31, 32, 33, 34, 36, 104, 109, 126, 138, 140], "properti": [19, 38, 53, 56, 76, 90, 111, 141, 143, 144, 146, 155], "proport": [79, 113], "proportion": 19, "propos": [0, 8, 11, 13, 19, 103, 105], "proposer_task": 11, "proprietari": [87, 172], "protect": [1, 36, 60, 154, 160], "proto": [12, 155], "protocol": [0, 10, 12, 17, 20, 27, 39, 56, 88], "protopackeddata": 12, "prototyp": [20, 36, 105, 155, 170], "prototype_control": 11, "proud": [8, 13, 16, 17], "prove": [15, 92, 121], "prover": [86, 171], "provid": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 47, 53, 54, 56, 63, 67, 78, 79, 80, 82, 84, 85, 86, 87, 88, 91, 92, 93, 94, 95, 98, 104, 105, 106, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 123, 124, 127, 128, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 146, 147, 150, 151, 154, 155, 156, 158, 159, 160, 161, 162, 163, 164, 166, 169, 170, 171, 172, 173], "provinc": 29, "proxy_dispatch_result_thread": 132, "prune": [10, 115, 121, 141], "pseudo": [79, 106, 113, 141, 149], "pt": [22, 61, 92], "pth": [87, 126, 160, 172], "ptq": [7, 135, 160], "ptr": 1, "ptr_idx": 126, "ptrdiff_t": 1, "ptune": [37, 160], "ptuning_setup": 146, "ptuning_setup_fuyu": 146, "ptuning_setup_llava_next": 146, "ptuning_setup_phi3": 146, "ptuning_setup_pixtr": 146, "ptuningconfig": 0, "public": [0, 1, 7, 36, 129, 160], "publish": [2, 3, 6, 12, 28, 29, 31, 32, 42, 43, 107, 132, 160], "pull": [2, 9, 18, 21, 37, 106, 160], "pull_pipe_addr": 155, "punctuat": 155, "puneeshkhanna": 160, "purchas": [42, 132], "pure": [21, 86, 104, 146], "purpos": [1, 8, 15, 16, 17, 26, 30, 36, 61, 79, 98, 103, 106, 113, 116, 133, 135, 138, 139, 144, 155], "pursu": [52, 57, 58, 60, 109, 152, 154, 159], "push": [15, 16, 105, 128], "push_pipe_addr": 155, "pushd": 151, "put": [1, 10, 13, 20, 29, 33, 34, 64, 65, 66, 90, 98, 109, 124, 138, 154], "put_zcopi": [88, 110], "pwd": [2, 106], "py": [2, 13, 14, 15, 16, 26, 28, 29, 30, 31, 32, 33, 34, 36, 38, 41, 42, 43, 53, 54, 61, 62, 64, 65, 68, 69, 70, 78, 81, 85, 91, 92, 96, 101, 103, 105, 106, 111, 112, 113, 115, 118, 120, 121, 122, 123, 124, 125, 126, 127, 131, 132, 133, 134, 135, 141, 144, 146, 151, 154, 156, 160, 161, 162, 165, 170, 177, 178], "py3": 160, "py_draft_token": 103, "py_executor_cr": 178, "py_rewind_len": 103, "pybind": [104, 155, 160], "pybind_class": 155, "pybind_equ": 155, "pybind_inst": 155, "pybindmirror": 155, "pydant": [36, 82, 90, 144, 155, 160, 166], "pydantic_cor": 155, "pydanticserializationerror": 155, "pydanticundefin": 155, "pyexecutor": [36, 40, 61, 97, 103, 160, 177, 178], "pynvml": 160, "pypi": [106, 108, 160], "pytest": [36, 37, 91], "python": [1, 2, 11, 14, 15, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 47, 58, 59, 61, 62, 64, 66, 68, 69, 70, 81, 82, 85, 86, 90, 92, 95, 101, 104, 109, 113, 114, 115, 118, 121, 123, 125, 127, 131, 132, 133, 134, 149, 154, 155, 156, 158, 159, 160, 161, 162, 165, 166, 170, 171, 173, 177, 178], "python3": [2, 28, 29, 30, 31, 32, 33, 34, 41, 42, 64, 65, 96, 106, 109, 118, 120, 122, 124, 131, 132, 151], "python_bind": 2, "python_e2": 146, "python_plugin": 160, "pythonpath": [2, 65, 66], "pytorch": [2, 10, 11, 14, 15, 16, 21, 22, 24, 27, 28, 29, 31, 32, 37, 40, 43, 53, 54, 55, 61, 70, 79, 80, 86, 87, 88, 95, 96, 100, 102, 103, 104, 105, 106, 109, 115, 121, 124, 141, 154, 155, 156, 160, 162, 163, 164, 171, 172, 173, 175, 176, 177, 178], "pytorch_alloc_conf": 28, "pytorch_extra_arg": 65, "pytorch_model": 151, "pytorch_model_registri": 177, "pytorchconfig": [79, 160, 163], "pytorchmodelengin": [162, 177], "pzzzzz5142": 160, "q": [3, 9, 13, 15, 22, 42, 79, 88, 110, 113, 114, 118, 132, 141, 151, 156, 161, 163], "q_b_proj": 141, "q_dim": 141, "q_lora_rank": [141, 142], "q_proj": [42, 87, 95, 126, 132, 156, 161, 166, 172, 173], "q_scale": [79, 113, 141, 142, 143], "qa": [24, 28, 29, 31, 32, 121], "qformat": [132, 145], "qgmma": 160, "qingquansong": 160, "qk_layernorm": [142, 143], "qk_nope_head_dim": [141, 142], "qk_norm": 142, "qk_rope_head_dim": [141, 142], "qkv": [12, 115, 118, 124, 126, 141, 151, 160, 163], "qkv_bia": [141, 160], "qkv_dim": 141, "qkv_proj": [87, 156, 161, 172], "qo_indptr": [79, 163], "qpi": 119, "qserv": [144, 160], "qserve_gemm_plugin": 144, "quadrat": [79, 113, 147], "qualifi": 37, "qualiti": [14, 16, 21, 102, 135, 139], "qualnam": [141, 143, 145, 155], "quant": [12, 42, 100, 127, 132, 141, 144, 155, 160, 175], "quant_algo": [42, 47, 95, 124, 126, 127, 132, 135, 143, 155, 173], "quant_config": [47, 79, 95, 127, 135, 143, 155, 163, 173], "quant_medusa_head": 145, "quant_mod": [127, 142, 143, 146, 155], "quantalgo": [47, 95, 105, 135, 143, 145, 155, 173], "quantconfig": [47, 79, 95, 105, 127, 135, 143, 155, 160, 163, 173], "quanticonfig": 127, "quantifi": [8, 79], "quantiz": [2, 3, 4, 13, 15, 20, 21, 22, 23, 28, 31, 32, 43, 52, 57, 79, 85, 86, 104, 105, 109, 113, 114, 119, 125, 126, 130, 133, 136, 137, 141, 142, 143, 144, 146, 150, 152, 153, 154, 155, 156, 158, 159, 160, 161, 163, 170, 171], "quantizaton": 132, "quantize_and_export": 145, "quantize_kwarg": 143, "quantize_lm_head": [145, 160], "quantize_per_token_plugin": 144, "quantize_tensor_plugin": 144, "quantized_valu": [79, 113], "quantizedkernel": 125, "quantizetensorplugin": 125, "quantmod": [1, 79, 113, 114, 141, 142, 143, 145, 146, 155], "quantmodewrapp": [141, 155], "quebec": 29, "queen": [29, 33], "queri": [3, 15, 17, 18, 21, 27, 28, 29, 31, 32, 33, 34, 42, 68, 81, 88, 90, 93, 105, 111, 114, 116, 121, 125, 132, 141, 147, 160, 163, 165, 177], "query_dim": 142, "query_key_valu": 126, "query_length": 142, "query_pre_attn_scalar": 143, "question": [11, 16, 19, 29, 30, 42, 67, 132, 147, 151], "queu": [0, 43, 98, 138], "queue": [0, 10, 27, 37, 40, 155, 162], "quick": [29, 33, 34, 53, 54, 85, 113, 132, 133, 158, 163, 170], "quick_gelu": 141, "quicker": 134, "quickli": [20, 96, 127, 159], "quickstart": [30, 53, 54, 133, 154], "quickstart_advanc": [14, 64, 78], "quickstart_multimod": 96, "quit": [26, 30, 115], "quot": [37, 160], "qweight": 126, "qwen": [11, 27, 33, 34, 35, 42, 51, 62, 86, 96, 100, 104, 126, 132, 141, 143, 149, 150, 157, 160, 171], "qwen1": [150, 160], "qwen2": [26, 27, 42, 45, 51, 73, 86, 96, 118, 132, 148, 150, 157, 158, 160, 171], "qwen2_5_vlforconditionalgener": [150, 157], "qwen2_audio_7b_instruct": 37, "qwen2audio": 160, "qwen2forcausallm": [150, 157], "qwen2forprocessrewardmodel": [150, 157], "qwen2forrewardmodel": [150, 157], "qwen2forsequenceclassif": 160, "qwen2vl": 160, "qwen2vlforconditionalgener": [150, 157], "qwen3": [16, 24, 27, 35, 62, 86, 99, 150, 157, 159, 160, 171], "qwen3_cod": 27, "qwen3_output": [33, 34], "qwen3forcausallm": [150, 157], "qwen3mo": [150, 157], "qwen3moeforcausallm": [150, 157], "qwen3next": 157, "qwen3nextforcausallm": 157, "qwenforcausallm": [126, 143], "qwenforcausallmgenerationsess": 146, "qwenvl": 160, "qwq": [86, 150, 157, 171], "qychen": 118, "qzero": 126, "r": [1, 24, 27, 52, 53, 54, 56, 57, 58, 59, 60, 62, 63, 67, 70, 101, 109, 118, 133, 140, 141, 151, 152, 154, 155, 159, 160], "r1": [10, 11, 12, 16, 20, 27, 35, 71, 77, 86, 99, 100, 103, 105, 158, 160, 171], "r1_in_tensorrt": [13, 160], "race": 160, "radix": [93, 177], "rag": 24, "rai": [105, 155], "rais": [11, 12, 63, 70, 71, 127, 133, 151, 155, 160], "raise_error": 155, "ram": [62, 92], "rand": [22, 42, 132, 141], "rand_data": 141, "rand_data_valid": 143, "randint": 10, "randn": 104, "random": [0, 19, 22, 24, 26, 28, 29, 30, 31, 32, 33, 34, 43, 50, 51, 69, 114, 141, 155, 160], "random_imag": 26, "random_se": [24, 143, 146, 155], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": [42, 43, 132], "randomse": [1, 114, 160], "randomseedtyp": 0, "rang": [0, 1, 10, 11, 12, 17, 18, 22, 34, 35, 41, 42, 61, 63, 85, 93, 104, 114, 117, 121, 131, 132, 139, 141, 143, 147, 149, 150, 151, 154, 155, 156, 158, 161, 170], "rank": [0, 1, 2, 8, 12, 15, 16, 20, 21, 23, 39, 40, 42, 85, 88, 91, 92, 99, 105, 110, 111, 112, 114, 118, 127, 132, 141, 143, 146, 147, 151, 155, 160, 166, 170], "rank0": 124, "rank1": 124, "rapid": [12, 43, 121], "rapidli": 18, "rate": [0, 2, 10, 12, 13, 14, 15, 17, 22, 26, 28, 29, 31, 32, 40, 42, 43, 50, 51, 62, 103, 132, 133, 160], "rather": [15, 16, 18, 61, 79, 104, 109, 113, 115, 121], "ratio": [15, 16, 17, 19, 30, 93, 155], "ration": [17, 88], "rational": 15, "raw": [21, 27, 40, 78, 87, 96, 172], "raw_audio": 146, "raw_imag": 146, "ray_orchestr": 101, "ray_worker_extension_cl": 155, "rayexecutor": 101, "raygpuwork": [101, 155], "rc": [28, 29, 31, 32], "rcn": [28, 29, 31, 32], "rdma": [17, 88, 110], "re": [2, 7, 11, 16, 18, 28, 29, 31, 32, 33, 34, 79, 92, 98, 103, 155, 158, 160, 163], "reach": [0, 11, 17, 20, 40, 79, 98, 113, 124, 132, 135, 140, 154, 155], "reachabl": 38, "react": 16, "read": [0, 2, 10, 12, 13, 14, 16, 20, 23, 39, 42, 67, 79, 94, 110, 111, 113, 121, 123, 125, 126, 132, 155, 160], "read_config_from_the_custom_training_checkpoint": 127, "read_input": 70, "readabl": [36, 40, 42, 132], "reader": 141, "readi": [0, 8, 9, 10, 12, 18, 21, 28, 29, 30, 31, 32, 33, 34, 40, 85, 96, 106, 155, 168, 170], "readm": [101, 121, 133, 152, 154, 160], "real": [2, 8, 13, 16, 19, 20, 21, 22, 90, 92, 94, 99, 103, 104, 106, 115, 120, 133, 135, 138, 139, 141, 151], "realist": [98, 168], "realiti": 138, "realiz": [117, 121], "realli": 33, "rearrang": 141, "reason": [0, 8, 10, 11, 13, 14, 16, 18, 24, 27, 29, 30, 33, 37, 40, 42, 77, 79, 86, 98, 103, 104, 105, 113, 114, 125, 127, 132, 134, 138, 139, 141, 151, 155, 166, 171], "reasoning_cont": [21, 29, 30, 33], "reasoning_pars": [27, 48, 155], "reassess": 11, "rebalanc": 16, "rebuild": [38, 98, 139, 141, 151, 155], "receiv": [0, 1, 12, 16, 17, 20, 26, 28, 29, 31, 32, 33, 34, 88, 92, 99, 110, 111, 112, 119, 121, 135, 141, 155, 160], "recent": [1, 4, 8, 11, 12, 13, 38, 79, 93, 113, 120], "recept": [17, 88], "recip": [13, 15, 100, 105, 149, 155], "reclaim": 0, "recogn": [13, 16, 17, 42, 104, 121, 132, 156, 161], "recommend": [2, 4, 7, 10, 15, 16, 17, 21, 24, 26, 27, 30, 35, 36, 42, 63, 79, 88, 98, 104, 106, 110, 113, 114, 121, 123, 126, 132, 136, 137, 138, 140, 151, 155, 156, 160, 161, 163], "recomput": [61, 62], "recompute_scale_factor": 141, "reconfigur": [91, 109, 111], "reconnect": 101, "reconstruct": [79, 113, 141], "record": [1, 2, 10, 13, 14, 16, 42, 115, 155, 158], "record_cr": 155, "record_ev": 104, "record_event_1": 104, "record_event_2": 104, "record_event_3": 104, "record_event_4": 104, "record_stat": 155, "record_stream": 104, "record_stream_1": 104, "recored": 0, "recov": [62, 155], "rectangular": 29, "recur": [61, 62], "recurr": [10, 121], "recurrentgemma": [149, 150, 160], "recurrentgemmaforcausallm": 143, "recurs": [2, 18, 82, 106, 154, 166], "recv": [0, 16, 125, 141], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [79, 98, 113, 177], "redesign": [12, 160], "redirect": [115, 155], "redistribut": [16, 20, 99], "redownload": [21, 28, 29, 31, 32], "redraft": [141, 143, 146, 160], "redrafter_draft_len_per_beam": 146, "redrafter_num_beam": 146, "redrafterforllamalm": 143, "redrafterforqwenlm": 143, "redraftermixin": 143, "reduc": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 28, 29, 31, 32, 33, 34, 37, 40, 41, 42, 43, 61, 63, 68, 79, 88, 93, 94, 97, 98, 99, 100, 103, 104, 106, 110, 111, 112, 113, 117, 119, 121, 125, 131, 132, 133, 134, 138, 140, 141, 144, 147, 151, 154, 155, 158, 160, 163, 174], "reduce_fus": [23, 132, 135, 139, 144, 160], "reduce_scatt": [20, 141], "reduceoper": 141, "reducescatt": [20, 23, 139, 144, 160], "reduct": [8, 11, 13, 119, 121, 140, 141, 160], "redund": [8, 13, 16, 36, 61, 93, 104, 121, 160], "redux": 160, "reevalu": 39, "ref": 70, "ref_templ": 155, "refactor": [2, 12, 36, 103, 127, 160], "refer": [0, 1, 2, 9, 12, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 36, 39, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 63, 68, 70, 72, 73, 74, 75, 76, 79, 81, 82, 85, 86, 88, 96, 101, 104, 106, 107, 109, 110, 111, 113, 114, 115, 116, 118, 121, 125, 127, 129, 132, 133, 134, 135, 136, 137, 139, 141, 150, 152, 154, 156, 158, 159, 160, 161, 163, 165, 166, 170], "referenc": 135, "reference_wrapp": [0, 111], "references_commit": 36, "refin": [20, 36, 160], "refit": [23, 125, 155, 160], "refit_engin": 125, "reflect": [8, 20, 36, 88, 110, 138], "refresh": [2, 132], "regard": [85, 101, 141, 170], "regardless": [11, 20, 87, 151, 172], "regex": [24, 90, 111, 155], "region": [0, 11, 29, 41, 104, 131], "regist": [0, 16, 20, 61, 87, 128, 151, 155, 156, 160, 161, 172], "register_auto_model": [156, 161], "register_checkpoint_load": [87, 155, 172], "register_checkpoint_weight_load": [87, 172], "register_config_load": [87, 172], "register_error": 155, "register_fak": 104, "register_kv_cach": [61, 92], "register_mapp": [87, 172], "register_network_output": 151, "registerdesc": 0, "registermemori": 0, "registr": [36, 39], "regress": [28, 31, 103, 113, 114, 125, 160], "regular": [0, 13, 61, 111, 113, 141, 155], "regularli": 11, "reimplement": 12, "reinforc": [101, 136, 137], "reject": [0, 14, 103, 105], "rel": [3, 8, 16, 79, 98, 117, 138, 140, 141, 160], "rel_attn_t": 142, "relat": [12, 19, 29, 30, 41, 85, 104, 107, 110, 112, 116, 126, 131, 141, 144, 147, 151, 153, 154, 156, 160, 161, 170, 177], "relationship": [8, 39, 147], "relative_attent": [141, 142], "relative_attention_bia": 141, "relax": [8, 16, 98, 103, 105, 113], "relaxed_delta": [13, 14, 71, 103, 155], "relaxed_topk": [13, 14, 71, 103, 155], "releas": [1, 3, 6, 7, 9, 10, 16, 17, 20, 21, 26, 28, 29, 31, 32, 36, 39, 79, 87, 88, 92, 93, 105, 106, 108, 113, 114, 116, 127, 141, 143, 147, 149, 150, 155, 159, 172], "release_build": [18, 30, 33, 34, 106], "release_run": [30, 33, 34, 106], "releasepag": 1, "releasest": 0, "releasewithtag": 1, "relev": [33, 38, 87, 90, 106, 114, 177], "reli": [16, 17, 20, 39, 41, 86, 94, 104, 110, 113, 115, 127, 131, 149, 154, 171], "reliabl": [85, 101, 170], "reload": [16, 92, 111], "relu": [124, 125, 141, 151], "remain": [0, 1, 8, 12, 13, 16, 20, 36, 37, 43, 82, 93, 98, 101, 106, 115, 117, 121, 122, 133, 135, 138, 139, 141, 147, 155, 160, 166], "remaind": [61, 93, 135], "remaining_chunk": 61, "remaining_token": 61, "remark": [13, 14, 20], "remateri": 1, "remedi": 10, "rememb": 16, "remind": [90, 113, 163], "remot": [0, 1, 12, 16, 22, 39, 92, 155], "remotenam": 0, "remov": [0, 1, 2, 10, 12, 14, 17, 21, 23, 24, 27, 36, 37, 79, 88, 93, 98, 103, 104, 106, 113, 114, 115, 116, 125, 126, 135, 141, 144, 147, 155, 156, 160, 161], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 143, "remove_input_pad": [23, 98, 113, 118, 141, 142, 144, 146], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 177, "removeprefix": 155, "removesuffix": 155, "renam": [36, 160], "render": 36, "rendezv": [88, 110], "reopen": 38, "reorder": [141, 142], "reorder_kv_cache_for_beam_search": 146, "rep": [41, 131], "repeat": [0, 14, 15, 61, 68, 93, 94, 113, 141, 155], "repeat_interleav": 141, "repeatedli": 121, "repetit": [0, 19, 63, 114, 141, 155], "repetition_penalti": [114, 146, 155, 160], "repetitionpenalti": [0, 1, 114], "replac": [2, 9, 11, 15, 18, 20, 28, 31, 32, 42, 62, 64, 65, 66, 98, 99, 104, 109, 115, 125, 126, 127, 132, 133, 135, 140, 141, 147, 155, 156, 161], "replace_add_with_sub": 115, "replace_all_uses_with": [115, 141], "replace_input_with": 115, "replace_output_uses_with": 115, "replace_outputs_uses_with": 115, "replai": [10, 16, 68, 104], "repli": 90, "replic": [0, 13, 16, 111, 141, 166], "replica": 99, "replit": [149, 150, 160], "repo": [9, 21, 35, 127, 133, 151, 160], "repo_id": 67, "report": [14, 15, 16, 18, 22, 41, 42, 43, 116, 131, 132, 147, 160], "report_json": [21, 22], "report_load_statist": 16, "reportpluginerror": 151, "repositori": [2, 9, 24, 38, 103, 106, 121, 128, 154, 159], "repr": 155, "repres": [0, 1, 2, 3, 7, 8, 11, 12, 13, 16, 19, 20, 36, 54, 56, 67, 90, 98, 99, 104, 116, 120, 121, 132, 138, 141, 146, 155, 178], "represent": [12, 40, 87, 115, 125, 155, 172], "reproduc": [10, 20, 22, 42, 105, 132, 160], "repurpos": 91, "req": [2, 26, 28, 29, 31, 32, 42, 43, 61, 103, 132, 133, 135, 138, 139], "req_id": [63, 102], "req_perf_metrics_dict": 155, "req_stat": 178, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 1, 2, 4, 6, 8, 10, 11, 14, 15, 16, 17, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 50, 51, 54, 61, 62, 65, 66, 68, 79, 80, 88, 90, 92, 94, 95, 96, 99, 103, 105, 110, 113, 114, 117, 118, 125, 131, 132, 133, 135, 138, 139, 140, 141, 147, 155, 158, 159, 160, 162, 163, 164, 168, 173, 177, 178], "request_finish": [61, 92], "request_id": [47, 61, 79, 155, 163], "request_json": 22, "request_perf_metr": [155, 160], "request_stats_max_iter": 155, "request_timeout": 27, "request_typ": 155, "request_type_context_and_gener": 0, "request_type_context_onli": 0, "request_type_generation_onli": 0, "requesterror": [105, 155], "requestid": [0, 110, 111], "requestidtyp": 0, "requestlist": 178, "requestoutput": [47, 105, 155, 160], "requestperfmetr": [0, 155], "requestschedul": 178, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 111, "requesttyp": [0, 1, 155], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 36, 37, 38, 39, 42, 43, 56, 67, 76, 78, 79, 80, 85, 87, 88, 90, 91, 92, 93, 94, 98, 101, 103, 104, 106, 109, 110, 113, 114, 117, 118, 121, 125, 126, 127, 132, 133, 134, 135, 139, 141, 142, 144, 147, 150, 151, 154, 155, 159, 160, 164, 166, 170, 172, 177], "require_ln_f": 143, "requiresattentionmask": 1, "rerun": [21, 28, 29, 31, 32, 139], "rescale_output_factor": 142, "research": [14, 16, 31, 32, 52, 57, 58, 60, 78, 109, 113, 129, 149, 152, 154, 159], "reserv": [0, 1, 20, 24, 27, 28, 29, 31, 32, 33, 34, 140, 146, 147, 155, 178], "reserved_block": 178, "reset": [0, 1, 42, 114, 132, 146, 155, 160], "resetspeculativedecodingmodul": 1, "reshap": [1, 141], "reshapebuff": 1, "reshapecacheindirectionbuff": 1, "reshapespeculativedecodingbuff": 1, "resid": [16, 99, 118], "residu": [8, 103, 104, 141, 151], "residual_connect": 142, "residual_mlp": 143, "residual_multipli": 143, "residual_rms_norm": 141, "residual_rms_norm_out_quant_fp8": 141, "residual_rms_norm_out_quant_nvfp4": 141, "residual_rms_norm_quant_fp8": 141, "residual_rms_norm_quant_nvfp4": 141, "residual_rms_prepost_norm": 141, "residualadd": [23, 139, 144, 160], "resiz": 1, "resize_kv_cach": [80, 164, 168, 169], "resolv": [10, 17, 20, 27, 45, 73, 82, 88, 151, 155, 166], "resourc": [0, 10, 13, 15, 17, 20, 21, 31, 37, 40, 49, 79, 87, 88, 98, 103, 110, 113, 127, 158, 159, 162, 172, 177, 178], "resource_manag": [103, 155], "resourcemanag": 103, "respect": [0, 10, 17, 18, 21, 38, 39, 47, 140, 141, 146, 147, 149, 155, 156, 161, 178], "respond": 20, "respons": [0, 8, 10, 11, 12, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 42, 47, 69, 71, 72, 73, 74, 75, 76, 78, 87, 88, 91, 92, 95, 97, 110, 116, 132, 141, 155, 162, 172, 173, 174], "response_format": [76, 90], "response_json": 76, "responsepostprocesswork": 155, "responsewithid": 0, "responsewrapp": 155, "rest": [1, 17, 79, 88, 103, 104, 113, 135, 155], "restart": 0, "restor": [1, 92], "restoremod": 1, "restrict": [0, 36, 37, 106, 111, 114, 141, 155, 176], "result": [0, 1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 42, 47, 70, 78, 79, 88, 90, 91, 94, 97, 99, 102, 104, 106, 112, 113, 119, 121, 125, 132, 134, 135, 136, 137, 138, 139, 141, 142, 144, 155, 156, 160, 161, 163, 176, 178], "result_dir": [26, 28, 29, 30, 31, 32, 33, 34], "result_handl": 155, "resum": 39, "ret": 70, "retail": [42, 132], "retain": [3, 5, 14, 21, 103], "retent": [0, 54, 155], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": [91, 105, 121], "rethrown": 1, "retri": 37, "retriev": [0, 1, 11, 17, 24, 27, 40, 43, 61, 78, 88, 126, 141, 155], "retrievebadhandl": 1, "return": [0, 1, 9, 11, 12, 17, 18, 20, 21, 28, 29, 30, 31, 32, 33, 34, 36, 39, 40, 47, 61, 62, 63, 69, 70, 78, 87, 88, 92, 98, 102, 103, 104, 111, 115, 118, 121, 123, 125, 126, 127, 132, 138, 141, 142, 143, 146, 147, 151, 155, 160, 172, 177, 178], "return_all_generated_token": 146, "return_context_logit": 155, "return_dict": 146, "return_encoder_output": [146, 155], "return_generation_logit": 155, "return_perf_metr": 155, "returnallgeneratedtoken": [0, 111], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 10, 11, 14, 20, 23, 24, 37, 38, 53, 54, 61, 62, 68, 70, 88, 89, 96, 103, 104, 105, 110, 111, 116, 130, 141, 144, 146, 147, 148, 155, 156, 157, 158, 160, 161, 168, 177], "reusabl": [11, 16, 93, 116, 117], "reusedblock": [0, 27], "reusedblocksperrequest": 0, "reveal": [8, 11, 13, 15, 20], "revers": [1, 12, 92, 141], "revert": [93, 141], "review": [12, 16, 36, 42, 80, 108, 109, 132, 164], "revis": [22, 24, 27, 155], "revisit": 105, "reward": 11, "reward_control": 11, "reward_kwarg": 11, "rewind": [14, 160], "rewrit": [85, 104, 141, 156, 160, 161, 170], "rewritepatternmanag": 115, "rewritten": 104, "rewrt": 151, "rf": 151, "rfind": [61, 155], "rg_lru": 141, "rgc": [42, 132], "rh": [0, 1], "rich": [34, 124], "right": [11, 17, 19, 20, 21, 91, 135, 141, 151, 155], "rigor": [42, 132], "rindex": 155, "risk": [10, 82, 88, 110, 125, 135, 140, 166], "river": [29, 33], "rjust": 155, "rl": 101, "rlhf": 101, "rm": [9, 21, 26, 28, 29, 31, 32, 106, 141, 150, 151, 156, 157, 159, 161], "rms_norm": [13, 141, 156, 161], "rmsnorm": [13, 104, 118, 141, 142, 143, 144, 156, 160, 161], "rmsnorm_quantization_plugin": 144, "rndv": [88, 110], "rnn": [23, 144, 160], "rnn_conv_dim_s": 146, "rnn_head_siz": 146, "rnn_hidden_s": 146, "rnn_state": 143, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": [2, 38], "roadmap": 105, "roberta": [150, 160], "robertaforquestionansw": 143, "robertaforsequenceclassif": 143, "robertamodel": 143, "robin": 17, "robot": [21, 69], "robust": [13, 16, 86, 160, 171], "rock": 141, "rocket": 155, "rocketkv": [70, 155], "rocketsparseattentionconfig": [70, 105, 155], "roi": 63, "role": [9, 17, 18, 21, 24, 27, 29, 30, 33, 34, 44, 45, 56, 72, 73, 76, 88, 90, 98, 125, 138, 159, 168], "roll": [1, 10, 33, 34], "rollback": 10, "rome": 90, "rooflin": 15, "room": 20, "root": [2, 20, 21, 28, 29, 30, 31, 32, 90, 106, 124, 128, 133, 141, 154, 155, 160], "root_lay": 115, "rootless": 38, "rope": [12, 13, 15, 36, 141, 146, 155, 160, 163], "rope_gpt_neox": [79, 113, 141, 143], "rope_gptj": [79, 113, 141], "rope_local_base_freq": 143, "rope_scaling_config": 141, "rope_scaling_factor": 36, "rope_scaling_long_factor": 142, "rope_scaling_long_mscal": 142, "rope_scaling_short_factor": 142, "rope_scaling_short_mscal": 142, "ropeembeddingutil": 141, "rotari": [0, 13, 141, 146, 155, 156, 161, 163], "rotary_bas": 143, "rotary_cos_sin": 141, "rotary_dim": 143, "rotary_embed": [156, 161], "rotary_embedding_bas": [141, 142], "rotary_embedding_base_loc": 142, "rotary_embedding_beta_fast": 142, "rotary_embedding_beta_slow": 142, "rotary_embedding_dim": [79, 113, 141, 143], "rotary_embedding_long_m_scal": 141, "rotary_embedding_max_posit": 141, "rotary_embedding_mscal": 142, "rotary_embedding_mscale_all_dim": 142, "rotary_embedding_origin_max_posit": 142, "rotary_embedding_original_max_posit": 141, "rotary_embedding_percentag": 142, "rotary_embedding_sc": 142, "rotary_embedding_scal": 141, "rotary_embedding_scale_typ": 141, "rotary_embedding_short_m_scal": 141, "rotary_inv_freq": [141, 142], "rotary_inv_freq_loc": 142, "rotary_pct": 143, "rotary_sc": [142, 143], "rotaryembed": [156, 161], "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 141, "rotat": 160, "rotate_every_two": 141, "rotate_half": 141, "roug": 24, "rouge_path": 24, "rough": 104, "roughli": [12, 19, 29, 33], "round": [11, 12, 17, 19, 30, 62, 141, 155], "round_robin": 17, "round_trip": 155, "roundtrip": 12, "rout": [12, 15, 16, 17, 20, 88, 99, 104, 110, 160], "router": [15, 16, 17, 88, 99, 112, 118, 160], "router_gemm": 13, "routin": [16, 103, 115], "routingkerneltopk": 13, "row": [10, 98, 118, 138, 141, 149, 160, 166], "rowlinear": [118, 142], "rowwis": [100, 144, 155, 160, 166], "rpartit": 155, "rpc": 155, "rr": 160, "rslora": 160, "rsp": 155, "rsplit": 155, "rst": 111, "rstrip": 155, "rt": 21, "rtx": [43, 158, 160], "rubric": 141, "rule": [82, 113, 134, 151, 166], "run": [0, 1, 3, 7, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 27, 30, 37, 38, 53, 55, 58, 59, 61, 62, 68, 69, 79, 85, 86, 88, 91, 92, 97, 98, 99, 101, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 117, 120, 121, 123, 124, 125, 128, 129, 134, 135, 138, 139, 140, 141, 146, 147, 149, 151, 155, 156, 160, 161, 162, 163, 170, 171, 174, 177], "run_all_demonstr": 69, "run_cmd": 106, "run_dsa": 70, "run_dtm_ngram": 121, "run_eagle3": 71, "run_llm": 70, "run_mtp": 71, "run_ngram": 71, "run_rocketkv": 70, "run_sqsh": 106, "run_task": 11, "runner": [0, 104, 124, 146], "runningleon": 160, "runpod": 128, "runtim": [0, 13, 14, 16, 17, 19, 20, 21, 22, 23, 27, 36, 41, 42, 55, 63, 67, 70, 79, 80, 81, 82, 85, 87, 92, 98, 104, 105, 109, 111, 113, 121, 122, 131, 132, 133, 136, 138, 141, 142, 143, 151, 154, 155, 156, 158, 160, 161, 163, 164, 165, 166, 168, 170, 172, 178], "runtime_config": 47, "runtime_default": 143, "runtime_error": 1, "runtime_rank": 146, "runtimedefault": [0, 143], "runtimedefaultsin": 143, "runtimeerror": [151, 155], "runtimetensor": 146, "rw": [9, 21, 28, 29, 31, 32], "s0": [79, 98, 113], "s1": [79, 98, 113], "s2": [79, 98, 113], "sa": 30, "sacrif": [12, 13], "sad": 146, "saeyoonoh": 160, "safe": [1, 15, 20, 115, 139], "safer": 141, "safetensor": [87, 124, 126, 151, 160, 172], "sage_attn": 141, "sage_attn_k_block_s": 141, "sage_attn_k_quant_s": 141, "sage_attn_q_block_s": 141, "sage_attn_q_quant_s": 141, "sage_attn_v_block_s": 141, "sage_attn_v_quant_s": 141, "sageattent": 141, "sai": [16, 33, 41, 98, 131, 133, 138], "said": 135, "sake": [20, 98, 138], "sale": [42, 63, 132], "salloc": 106, "salt": [0, 155], "same": [0, 1, 4, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 26, 28, 29, 31, 32, 33, 37, 39, 42, 43, 54, 61, 69, 79, 88, 91, 92, 93, 94, 99, 101, 103, 104, 105, 106, 110, 111, 113, 114, 115, 116, 117, 118, 119, 121, 122, 125, 127, 132, 135, 139, 140, 141, 142, 144, 146, 147, 154, 155, 160, 166], "sampl": [0, 1, 2, 10, 13, 14, 19, 22, 24, 28, 29, 31, 32, 40, 41, 42, 43, 52, 55, 57, 58, 59, 60, 63, 67, 70, 78, 79, 97, 105, 109, 111, 113, 125, 130, 131, 132, 141, 142, 146, 152, 153, 154, 155, 158, 159, 160, 174], "sample_num": 11, "sample_proj_bia": 142, "sample_st": [40, 97], "sample_weight_strip": 160, "samplemod": 141, "sampler": [22, 29, 40, 89, 103, 155, 157, 160], "sampler_opt": 22, "sampler_typ": [9, 155], "samplertyp": 155, "sampling_config": 146, "sampling_param": [11, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 68, 69, 70, 90, 95, 102, 109, 133, 140, 152, 154, 155, 159, 160, 173, 176], "samplingconfig": [0, 47, 111, 114, 146, 160], "samplingparam": [47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 68, 69, 70, 71, 78, 90, 95, 102, 105, 109, 133, 140, 152, 154, 155, 159, 160, 173, 176], "san": 90, "sandbox": 106, "saniti": [109, 134, 135, 139], "santa": 61, "santacod": [149, 150], "satfinit": 149, "satisfi": [11, 12, 16, 17, 114, 126, 160], "satur": [16, 20, 21, 98], "save": [2, 8, 10, 12, 14, 15, 16, 21, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 41, 61, 79, 92, 93, 103, 104, 113, 117, 121, 127, 128, 131, 132, 135, 139, 140, 147, 155, 160], "save_checkpoint": [127, 143], "save_config": [127, 143], "save_hidden_st": 143, "save_kv_lay": [61, 92], "savehiddenstatesdecodingconfig": [105, 155], "savest": 155, "saw": 135, "sbatch": [64, 65, 66, 125], "sbsa": [153, 160], "scaffold": [105, 156, 160, 161], "scaffolding_llm": 160, "scaffoldingllm": 160, "scalabl": [8, 16, 20, 21, 36], "scalar": [104, 114, 119, 141], "scalartyp": 160, "scale": [0, 11, 15, 17, 21, 23, 54, 88, 99, 100, 103, 105, 114, 118, 126, 135, 141, 142, 144, 149, 155, 160], "scale_d0": 141, "scale_d1": 141, "scale_factor": 141, "scale_output": 141, "scale_qk": 142, "scale_typ": 141, "scalia": [52, 109, 152, 154, 159], "scaling_factor": [36, 141], "scaling_long_factor": 141, "scaling_short_factor": 141, "scalingfactor": 12, "scalingvecpoint": 1, "scan": 19, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scarc": 37, "scatter": [16, 115, 141, 144], "scatter_nd": 141, "scenario": [2, 5, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 22, 23, 35, 36, 43, 61, 62, 79, 92, 94, 96, 98, 99, 103, 113, 119, 121, 124, 129, 132, 133, 135, 138, 139, 144, 159, 160], "scene": 11, "scfg": 146, "schedul": [0, 2, 10, 11, 14, 15, 16, 19, 22, 23, 24, 27, 28, 29, 31, 32, 33, 34, 36, 42, 61, 85, 86, 88, 89, 102, 103, 104, 105, 110, 111, 117, 118, 132, 133, 139, 147, 153, 155, 157, 160, 170, 171], "schedule_request": 178, "scheduled_batch": [40, 97], "scheduled_request": 178, "scheduledrequest": [10, 103], "scheduler_config": [140, 155], "scheduler_output": [61, 92], "scheduler_polici": 22, "schedulerconfig": [0, 105, 140, 155, 160], "scheduleroutput": [61, 92], "schedulerpolici": 160, "scheduling_param": 155, "schedulingparam": 155, "schema": [0, 10, 42, 56, 77, 105, 111, 132, 144, 155, 160], "schema_gener": 155, "schema_json": 155, "scheme": [0, 93, 155, 160], "scicod": 13, "scienc": [52, 57, 58, 60, 61, 109, 152, 154, 159], "scope": [11, 14, 104, 160], "score": [15, 78, 93, 114], "scout": [19, 35, 86, 150, 157, 160, 171], "scratch": [16, 38, 62, 70, 132, 133, 134, 139], "script": [2, 16, 17, 20, 22, 26, 28, 29, 30, 31, 32, 33, 34, 37, 38, 41, 42, 43, 61, 62, 64, 65, 66, 68, 82, 85, 88, 92, 96, 99, 100, 106, 118, 120, 121, 123, 125, 127, 128, 131, 132, 133, 144, 149, 151, 154, 156, 160, 161, 166, 170, 175], "script_fil": 30, "sd3": 142, "sd35adalayernormzerox": 142, "sd3patchemb": 142, "sd3transformer2dmodel": 143, "sd3transformer2dmodelconfig": 143, "sdxl": 160, "se": 158, "seamless": [86, 105, 153, 158, 160, 171], "seamlessli": [84, 105, 154, 158, 169], "search": [0, 1, 6, 11, 22, 23, 24, 27, 37, 40, 47, 69, 90, 93, 94, 105, 111, 114, 121, 135, 138, 141, 155, 160, 162], "seashor": [27, 45, 73], "seat": [52, 109, 152, 154, 159], "sec": [2, 4, 12, 17, 42, 43, 105, 132, 133, 135, 138, 139], "second": [1, 2, 3, 5, 6, 8, 10, 11, 13, 16, 17, 18, 19, 21, 27, 61, 62, 87, 88, 91, 92, 93, 98, 111, 114, 117, 118, 121, 138, 141, 155, 172], "secondari": [0, 12, 93, 116, 147, 155], "secondary_kernel": 12, "secondary_offload_min_prior": [93, 155], "secondaryoffloadminprior": 0, "secondli": [98, 138], "section": [2, 12, 14, 15, 16, 20, 21, 26, 27, 28, 29, 31, 32, 33, 34, 37, 38, 39, 42, 79, 84, 87, 88, 93, 98, 103, 104, 106, 111, 114, 125, 126, 127, 132, 133, 135, 136, 137, 138, 139, 141, 150, 159, 160, 163, 169, 172], "section_s": 141, "secur": [56, 160], "securityprotocol": 56, "see": [0, 1, 2, 3, 5, 6, 7, 11, 12, 14, 15, 16, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 42, 43, 45, 49, 52, 61, 73, 78, 79, 81, 84, 85, 90, 91, 92, 93, 96, 98, 99, 103, 104, 106, 107, 108, 109, 113, 114, 116, 121, 125, 126, 128, 129, 132, 133, 135, 138, 139, 140, 141, 142, 143, 147, 149, 151, 160, 165, 169, 170, 177], "seed": [0, 9, 22, 24, 50, 51, 114, 145, 150, 155, 160], "seek": [28, 29, 31, 32], "seem": [33, 42, 67, 117, 132, 134], "seen": [2, 16, 19, 42, 91, 121, 132], "segment": [104, 160], "select": [0, 1, 7, 12, 13, 15, 17, 19, 20, 23, 30, 37, 39, 40, 42, 87, 88, 99, 104, 105, 112, 114, 132, 139, 141, 144, 146, 147, 155, 162, 172, 178], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 141, "self": [0, 11, 36, 40, 61, 63, 79, 87, 92, 97, 102, 103, 104, 113, 115, 123, 125, 126, 132, 141, 143, 144, 146, 151, 155, 156, 161, 172, 177, 178], "self_attent": 126, "self_attention_mask": 142, "self_attention_packed_mask": 142, "self_attn": [126, 156, 161], "selfidx": 0, "sell": [42, 132], "semant": [36, 39], "semianalysi": 12, "semicolon": 106, "senat": [52, 109, 152, 154, 159], "send": [0, 9, 11, 12, 13, 16, 17, 20, 21, 27, 28, 29, 30, 31, 32, 33, 34, 39, 66, 88, 96, 110, 125, 133, 134, 141, 159, 160], "sender": 155, "sens": 135, "sensit": [8, 13, 16, 20, 91, 135], "sent": [0, 15, 16, 17, 26, 27, 28, 29, 31, 32, 62, 88, 121, 155], "sentenc": [0, 9, 28, 31, 63, 114, 155, 159, 168], "sep": 155, "separ": [8, 10, 12, 16, 17, 20, 23, 33, 37, 43, 61, 79, 80, 82, 87, 88, 91, 92, 93, 99, 104, 106, 119, 121, 132, 141, 144, 146, 155, 158, 159, 163, 164, 166, 172], "separate_match_rewrit": 115, "seq": [1, 42, 79, 81, 113, 132, 141, 160, 165], "seq_idx": 146, "seq_len": [43, 79, 141, 142, 163], "seq_length": 141, "seq_lens_cuda": [79, 163], "seqlen": [0, 12, 141], "seqslot": 1, "sequenc": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 40, 42, 43, 61, 63, 68, 69, 70, 78, 79, 80, 81, 88, 91, 92, 93, 98, 99, 102, 103, 105, 111, 113, 114, 115, 116, 117, 121, 125, 132, 133, 136, 137, 140, 141, 142, 146, 147, 155, 158, 160, 163, 164, 165, 168, 177], "sequence_length": [141, 142, 146, 151], "sequence_length_buff": 146, "sequence_limit_length": 146, "sequenceindex": [0, 111], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 8, 14, 62, 88, 110, 121, 147], "seri": [11, 12, 29, 85, 86, 158, 160, 170, 171], "serial": [20, 22, 23, 141, 143, 146, 155], "serializ": 155, "serialize_as_ani": 155, "serialize_engin": 146, "serializeds": 0, "serializedst": 0, "serv": [0, 6, 7, 8, 9, 10, 12, 16, 18, 19, 20, 24, 28, 29, 31, 32, 33, 34, 35, 39, 42, 44, 45, 46, 48, 50, 51, 55, 72, 73, 74, 75, 76, 79, 89, 91, 92, 97, 98, 99, 101, 104, 105, 109, 110, 111, 113, 116, 125, 139, 155, 157, 158, 160, 162, 163, 170, 174], "server": [0, 4, 16, 20, 26, 43, 44, 45, 46, 48, 50, 51, 66, 72, 73, 74, 75, 76, 91, 105, 110, 117, 121, 125, 128, 158, 159, 160, 168], "server_env_var": 30, "server_rol": 27, "server_start_timeout": 27, "servic": [11, 17, 20, 21, 38, 63, 88, 104, 160], "session": [61, 106, 113, 132, 146, 155], "set": [0, 1, 2, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 30, 35, 37, 38, 41, 43, 47, 53, 54, 56, 62, 64, 65, 66, 78, 79, 82, 87, 88, 91, 93, 94, 97, 98, 99, 100, 102, 103, 104, 106, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 124, 126, 127, 131, 133, 135, 138, 139, 140, 141, 142, 143, 144, 146, 147, 151, 154, 155, 159, 160, 166, 168, 172, 174, 178], "set_api_statu": 36, "set_attn_processor": 143, "set_context_fmha": 144, "set_default_max_input_len": 155, "set_dora_plugin": 144, "set_fp8_rowwise_quant_plugin": 144, "set_from_opt": 1, "set_if_not_exist": 143, "set_input_shap": 146, "set_lora_plugin": 144, "set_nccl_plugin": 144, "set_qserve_plugin": 144, "set_rank": 143, "set_rel_attn_t": 142, "set_shap": 146, "set_smooth_quant_plugin": 144, "set_stream": 104, "set_stream_1": 104, "setadditionalmodeloutput": [0, 111], "setallottedtimem": 0, "setattentiondpeventsgatherperiodm": 0, "setattr": 11, "setbackend": 0, "setbackendtyp": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": [0, 1], "setbeamwidtharrai": 0, "setbitto": 0, "setcachesaltid": 0, "setcachest": 0, "setcachetransceiverconfig": [0, 88], "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": 0, "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfailfastonattentionwindowtoolarg": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgenerationstep": 1, "setgpuweightsperc": [0, 122], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setkvtransfersenderfuturetimeoutm": 0, "setkvtransfertimeoutm": 0, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxgputotalbyt": 0, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmaxtokensinbuff": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setmultimodalinput": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setpromptignorelength": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 111], "setrequeststatsmaxiter": 0, "setrequesttyp": 0, "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 114], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 9, 17, 18, 26, 28, 29, 30, 31, 32, 33, 34, 39, 56, 64, 65, 66, 85, 109, 113, 134, 135, 146, 147, 154, 155, 158, 159, 160, 170], "setup_embedding_parallel_mod": 155, "setup_fake_prompt": 146, "setup_fake_prompts_qwen2vl": 146, "setup_fake_prompts_vila": 146, "setup_input": 146, "setupbuff": 1, "setupcacheindirect": 1, "setupcacheindirectionbuff": 1, "setupspeculativedecod": 1, "setupspeculativedecodingbuff": 1, "setuptool": [85, 109, 170], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setuseuvm": 0, "setvirtualmemoryalloc": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seven": 19, "sever": [0, 1, 8, 10, 11, 16, 17, 18, 19, 20, 21, 40, 43, 47, 62, 78, 79, 88, 92, 98, 104, 113, 115, 121, 124, 135, 136, 137, 138, 139, 141, 147, 151, 158, 163], "sft": [67, 86, 171], "sglang": [10, 11, 16, 97, 174], "sh": [18, 26, 28, 29, 30, 31, 32, 33, 34, 38, 64, 65, 66, 100, 125, 128, 160, 175], "shah": 160, "shall": [1, 127, 147], "shape": [0, 1, 10, 12, 13, 15, 79, 98, 104, 113, 115, 118, 124, 125, 139, 141, 143, 146, 147, 149, 151, 155, 160, 163, 166, 177], "shape_cast_dtyp": 141, "shapeequ": 1, "shard": [13, 22, 37, 81, 85, 86, 91, 99, 126, 132, 136, 141, 142, 165, 170, 171], "shard_map": 126, "sharding_along_vocab": 155, "sharding_dim": [141, 142], "share": [1, 2, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 39, 61, 68, 79, 87, 88, 90, 92, 93, 94, 103, 106, 110, 111, 113, 115, 116, 117, 118, 121, 127, 134, 135, 141, 142, 144, 160, 166, 172], "share_embed": 160, "share_weight": 142, "shared_embedding_t": 160, "shared_expert_output": 141, "shared_fc1": 15, "shared_fc2": 15, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": [10, 85, 160, 170], "shell": [37, 38, 107], "sherlock113": 160, "shift": [14, 16, 119], "ship": [21, 127], "shm": [16, 151], "short": [8, 11, 16, 21, 24, 69, 79, 98, 113, 132, 135, 138], "short_factor": 141, "short_mscal": [141, 142], "shorter": [43, 63, 79, 98, 113], "shortli": 27, "shot": [28, 31, 32, 160], "should": [0, 1, 2, 10, 11, 12, 15, 16, 20, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 42, 43, 47, 52, 54, 56, 61, 64, 65, 66, 67, 68, 78, 79, 84, 87, 90, 92, 93, 98, 103, 104, 106, 108, 109, 111, 115, 117, 118, 119, 127, 132, 133, 134, 139, 140, 141, 142, 146, 147, 152, 154, 155, 156, 159, 160, 161, 163, 172, 177, 178], "should_early_stop": 11, "should_skip_modul": [87, 172], "should_stop": 146, "shouldus": [79, 113], "show": [4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 30, 37, 43, 52, 62, 78, 88, 93, 94, 98, 104, 111, 125, 133, 138, 139, 144, 147, 150, 153, 159, 168], "showcas": [55, 62, 135, 139], "shown": [5, 11, 12, 14, 16, 17, 26, 28, 29, 30, 31, 32, 39, 42, 43, 61, 87, 88, 98, 106, 119, 132, 133, 135, 138, 139, 141, 172], "shrunk": 141, "shuffl": 141, "shutdown": [0, 62, 154, 155], "shutdown_abort": 155, "si": [79, 113], "sibl": 125, "side": [12, 16, 20, 40, 82, 91, 104, 111, 141, 155, 166], "side_stream_id": 141, "sidestreamidtyp": 141, "sigh": 67, "sigmoid": [125, 141], "signal": [0, 11, 12, 20], "signatur": [10, 36, 115, 141], "signifi": [98, 138], "signific": [5, 8, 10, 11, 12, 14, 15, 16, 20, 40, 67, 79, 88, 110, 111, 113, 116, 134, 135, 138, 139], "significantli": [7, 8, 12, 13, 14, 15, 16, 17, 20, 26, 28, 29, 31, 32, 33, 34, 40, 62, 79, 82, 94, 97, 98, 99, 104, 133, 134, 135, 138, 139, 147, 155, 163, 166, 174], "silicon": [15, 21], "silu": [125, 141, 142], "silu_and_mul": 104, "silu_and_mul_1": 104, "similar": [0, 2, 3, 5, 8, 10, 11, 12, 14, 16, 19, 20, 21, 26, 41, 42, 47, 79, 103, 113, 114, 115, 121, 131, 132, 140, 141, 159, 162, 178], "similarli": [10, 87, 88, 101, 121, 172], "simpl": [9, 11, 12, 16, 18, 19, 20, 21, 30, 33, 43, 52, 80, 82, 86, 92, 101, 103, 104, 106, 108, 115, 116, 121, 125, 152, 153, 154, 159, 164, 166, 168, 171], "simple_shard_onli": 169, "simpler": [16, 121], "simpleschedul": 178, "simplest": [91, 103, 107, 141], "simpli": [11, 12, 42, 43, 82, 94, 98, 103, 113, 121, 132, 138, 144, 151, 154, 156, 159, 161, 166], "simplic": [19, 127], "simplifi": [8, 10, 24, 39, 40, 42, 61, 85, 86, 92, 98, 113, 127, 132, 138, 141, 154, 160, 170, 171], "simul": 62, "simultan": [8, 95, 102, 121, 138, 166, 173], "sin": [0, 141, 142], "sinc": [0, 1, 2, 8, 10, 12, 14, 15, 16, 19, 20, 22, 26, 27, 28, 29, 31, 32, 33, 34, 36, 39, 42, 47, 62, 63, 79, 82, 94, 98, 103, 104, 106, 112, 113, 115, 117, 121, 122, 127, 128, 132, 133, 134, 135, 138, 139, 141, 143, 144, 147, 155, 162, 166, 177, 178], "sincer": [10, 11, 15], "sinco": 142, "singl": [0, 1, 2, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 40, 41, 42, 45, 54, 61, 62, 73, 79, 82, 85, 88, 91, 93, 98, 99, 101, 103, 104, 110, 111, 112, 113, 114, 116, 121, 123, 125, 127, 131, 132, 135, 139, 141, 143, 144, 146, 147, 149, 155, 156, 158, 159, 160, 161, 162, 163, 166, 168, 170, 177], "singleton": [115, 141], "sink": [0, 1, 79, 113, 146, 155], "sink_token_len": 146, "sink_token_length": [79, 93, 113, 146, 155], "sinktokenlength": [0, 1], "sinusoid": 142, "sit": [29, 67, 127], "situaiton": 43, "situat": [20, 33, 67, 98, 121, 133, 138], "six": 14, "size": [0, 1, 2, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 40, 41, 42, 43, 47, 62, 64, 65, 66, 68, 70, 79, 80, 81, 82, 84, 88, 91, 94, 95, 98, 99, 103, 104, 105, 110, 113, 114, 116, 117, 118, 119, 121, 122, 131, 132, 133, 134, 135, 136, 139, 141, 142, 143, 144, 146, 151, 155, 160, 163, 164, 165, 166, 169, 173, 178], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 2, 10, 22, 37, 61, 62, 80, 81, 87, 88, 92, 106, 115, 126, 129, 141, 155, 160, 164, 165, 166, 172, 178], "skip_attn": [141, 142], "skip_cross_attn_block": [143, 146], "skip_cross_kv": [142, 146], "skip_encod": 146, "skip_loading_weight": [80, 84, 164, 168, 169], "skip_special_token": [155, 160], "skip_tokenizer_init": [47, 155], "skipcrossattnblock": [0, 1], "sku": [133, 135, 138, 139], "skylin": 33, "skyscrap": 33, "skywork": [149, 150, 160], "sleep": [129, 155], "slice": [1, 99, 104, 112, 126, 141, 155, 160], "slice_shap": 126, "sliceinputtyp": 141, "slicen": 1, "slide": [0, 19, 89, 105, 116, 140, 141, 146, 157, 160], "slider": [2, 13, 42, 132], "sliding_window": 143, "sliding_window_caus": 141, "slight": [2, 14, 15, 135, 138, 139], "slighter": 10, "slightli": [0, 10, 21, 27, 100, 118, 119, 135, 139], "slope": [79, 113, 141], "slot": [0, 1, 10, 16, 99, 160], "slot_map": [141, 143], "slotid": 16, "slotidx": 1, "slotsperpag": 1, "slow": [20, 86, 104, 111, 117, 134, 155], "slowdown": [20, 155], "slower": [15, 40, 62, 116, 127, 134], "slowest": [8, 79, 113], "slurm": [16, 20, 26, 105, 109, 125, 151, 160], "slurm_script": [20, 99], "sm": [21, 150, 160], "sm100": [100, 150, 157], "sm120": [100, 150, 160], "sm121": 160, "sm80": [150, 160], "sm86": [150, 160], "sm89": [150, 160], "sm90": [100, 150, 157, 160], "small": [7, 10, 12, 13, 14, 15, 16, 19, 20, 21, 62, 63, 79, 95, 99, 104, 113, 117, 119, 121, 125, 133, 135, 138, 139, 141, 147, 148, 150, 151, 160, 173], "smaller": [1, 2, 11, 14, 19, 20, 21, 41, 42, 84, 94, 98, 102, 104, 121, 131, 132, 134, 138, 139, 140, 141, 147, 160, 169], "smallest": [0, 1, 116, 141], "smart": [17, 88, 141, 160], "smaug": [150, 160], "smi": [2, 13, 18, 21, 28, 29, 31, 32, 33, 34, 42, 88, 110, 132, 147], "smile": 67, "smith": [52, 57, 58, 59, 60, 109, 152, 154, 159], "smooth": [127, 144, 155, 160], "smooth_quant_gemm_plugin": 144, "smooth_quant_plugin": 144, "smoother": 2, "smoothli": 11, "smoothquant": [7, 115, 160], "smoothquant_v": 155, "snap": 155, "snapshot": [10, 132], "snapshot_download": 67, "snip": [42, 132], "snippet": [10, 21, 132, 160, 178], "snshrivas10": 67, "so": [0, 1, 2, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 28, 29, 30, 31, 32, 33, 36, 37, 42, 43, 47, 53, 61, 79, 93, 98, 101, 102, 103, 104, 106, 111, 113, 115, 118, 121, 127, 128, 132, 134, 135, 138, 139, 140, 141, 142, 143, 146, 147, 150, 155, 156, 160, 161, 168, 177], "soc": 61, "socketst": 0, "softmax": [14, 15, 78, 79, 113, 125, 141, 163], "softplu": 141, "softwar": [15, 16, 19, 21, 28, 29, 31, 32, 108, 109, 111, 113, 125, 160], "sol": 17, "sole": 24, "solid": [12, 136, 137], "solut": [10, 11, 16, 20, 21, 93, 99, 104, 109, 151, 154, 162], "some": [0, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 27, 29, 37, 38, 39, 43, 63, 67, 79, 82, 87, 88, 93, 96, 98, 104, 109, 110, 111, 112, 113, 114, 115, 117, 121, 122, 124, 125, 127, 129, 135, 136, 137, 139, 140, 141, 144, 147, 151, 154, 155, 156, 160, 161, 162, 166, 178], "some_uri": 38, "someon": 33, "someone\u56fd\u5916": 33, "someth": [11, 30, 47, 125], "sometim": [16, 17, 37, 42, 88, 132], "song": [42, 132], "soon": [0, 3, 4, 5, 6, 7, 20, 47, 93, 101], "sophist": [8, 20, 82, 166], "sora": [27, 45, 73], "sort": [0, 8, 111, 114, 141, 155], "sota": 160, "sourc": [0, 2, 3, 6, 10, 11, 13, 15, 16, 22, 23, 27, 28, 29, 31, 32, 33, 34, 38, 44, 45, 46, 48, 50, 51, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 82, 85, 87, 90, 92, 104, 107, 108, 109, 120, 123, 124, 126, 127, 141, 142, 143, 144, 145, 146, 155, 158, 160, 166, 170, 172], "source_dir": 38, "source_param1": [87, 172], "source_param2": [87, 172], "source_root": [64, 65, 66], "sourcetaskvalu": 1, "south": [27, 29], "southeast": 29, "southern": 29, "southwest": 29, "soyer": [123, 125, 151], "sp": [82, 166], "sp_kwarg": [82, 166], "space": [16, 17, 20, 62, 85, 88, 93, 96, 98, 103, 106, 118, 138, 147, 155, 166, 170, 177], "spaces_between_special_token": [155, 160], "span": [13, 14, 16, 17, 94, 127], "spars": [15, 20, 55, 99, 121, 141, 155, 160], "sparse_attention_config": [70, 155], "sparse_fc1": 15, "sparse_fc2": 15, "sparseattentionconfig": 155, "sparsiti": [16, 20, 21, 23, 155], "spatial": 91, "spatial_norm_dim": 142, "spawn": [60, 101, 133, 151, 154], "spawnprocess": 0, "speakleash": 150, "spec": [16, 160], "spec_config": [19, 71], "spec_dec_mod": 155, "spec_decode_algo": [14, 19], "spec_decode_nextn": 14, "spec_decoding_generation_length": [141, 142, 143], "spec_decoding_is_generation_length_vari": [141, 142, 143], "spec_decoding_max_generation_length": [141, 142], "spec_decoding_packed_mask": [141, 142, 143], "spec_decoding_param": [142, 143], "spec_decoding_position_offset": [141, 142, 143], "spec_decoding_us": [141, 142], "specconfig": [103, 160], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 142, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "special": [3, 10, 11, 12, 14, 23, 79, 87, 88, 104, 110, 113, 118, 125, 126, 144, 155, 160, 166, 172], "specif": [0, 1, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 42, 61, 63, 68, 69, 82, 87, 88, 92, 93, 95, 99, 104, 105, 106, 109, 112, 114, 115, 116, 118, 119, 120, 121, 124, 127, 132, 134, 135, 139, 141, 154, 155, 156, 158, 159, 160, 161, 162, 166, 172, 173], "specifi": [0, 1, 2, 10, 11, 16, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 41, 42, 43, 47, 56, 63, 67, 76, 78, 79, 80, 81, 82, 83, 85, 86, 88, 90, 93, 94, 103, 104, 106, 111, 113, 114, 115, 116, 118, 121, 126, 127, 131, 132, 134, 135, 138, 140, 141, 143, 144, 146, 147, 151, 154, 155, 159, 160, 163, 164, 165, 166, 167, 170, 171, 172], "specmetadata": 103, "spectrum": [8, 158], "specul": [0, 1, 13, 16, 17, 18, 40, 55, 85, 102, 104, 105, 111, 130, 132, 133, 141, 143, 152, 154, 155, 158, 160, 170, 176], "speculative_config": [2, 9, 13, 14, 28, 71, 103, 104, 155], "speculative_decod": 160, "speculative_decoding_draft_tokens_extern": 143, "speculative_decoding_mod": [23, 132, 155], "speculative_model": 103, "speculative_model_dir": [9, 18, 71, 155], "speculative_model_format": 155, "speculativeconfig": 155, "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [103, 143, 155, 160], "speculativedecodingmodul": 160, "speculativedecodingoutput": 1, "speed": [4, 12, 13, 14, 15, 16, 18, 23, 42, 43, 103, 125, 132, 139, 144, 160], "speedup": [2, 4, 6, 7, 8, 10, 11, 13, 15, 17, 18, 21], "spent": 0, "spirit": 16, "split": [1, 12, 19, 33, 42, 79, 91, 92, 94, 98, 99, 112, 113, 118, 125, 132, 134, 135, 141, 147, 155, 158, 160, 166], "split_input_id": 146, "split_prompt_by_imag": 146, "split_siz": 141, "split_size_or_sect": 141, "splitlin": 155, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 110, "spot": [16, 20, 138], "sq": [7, 149, 160], "sqrt": [79, 113, 141], "sqsh": 106, "sqsh_path": 106, "squar": [20, 26, 33, 98, 138, 141], "squared_relu": 141, "squash": 106, "squeez": [1, 141, 146], "src": [1, 125, 141], "src_seq_len": 141, "srcdesc": 0, "srctype": 1, "srun": [27, 64, 65, 66, 109, 125, 151], "ssa": 104, "ssd": [9, 18, 21, 92], "ssh": 38, "sshd": 128, "ssid": 56, "ssm": [85, 141, 155, 166, 170], "ssm_state": 143, "stabil": [8, 13, 16, 85, 101, 105, 120, 155, 170], "stabl": [16, 23, 36, 79, 98, 104, 113, 126, 133, 138, 139, 141, 144, 155, 160], "stack": [9, 11, 13, 18, 21, 26, 28, 29, 31, 32, 86, 106, 126, 141, 159, 171], "stacklevel": 36, "stackoverflow": 38, "stage": [0, 11, 14, 17, 18, 22, 36, 39, 43, 79, 82, 85, 88, 96, 101, 103, 104, 105, 113, 115, 147, 160, 163, 166, 170], "stage_list": 37, "stai": [4, 7, 16, 39, 134, 139], "stall": [16, 92], "stand": 125, "standalon": 127, "standard": [0, 3, 10, 11, 12, 16, 18, 21, 22, 43, 80, 82, 85, 86, 92, 96, 121, 125, 141, 164, 166, 170, 171], "starcod": [150, 160], "starcoder1": 149, "starcoder2": [86, 149, 160, 171], "starrickliu": 160, "start": [0, 2, 11, 12, 14, 16, 20, 21, 23, 24, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 43, 44, 45, 46, 48, 50, 51, 53, 54, 66, 67, 72, 73, 74, 75, 76, 88, 98, 99, 106, 111, 113, 115, 117, 128, 129, 132, 133, 134, 138, 140, 141, 143, 145, 146, 147, 155, 158, 160], "start_dim": 141, "start_idx": 24, "start_load_kv": [61, 92], "start_tag": 90, "started_loading_req_id": [61, 92], "startswith": 155, "startup": [26, 28, 29, 30, 31, 32, 33, 34, 88, 110, 151], "stat": [0, 155, 160], "state": [0, 1, 2, 11, 12, 13, 14, 16, 17, 20, 23, 28, 29, 30, 31, 32, 33, 42, 43, 52, 53, 54, 62, 78, 79, 85, 87, 90, 92, 93, 98, 99, 103, 105, 109, 111, 112, 113, 115, 116, 117, 121, 132, 133, 138, 140, 141, 144, 152, 154, 155, 159, 160, 166, 170, 172, 178], "state_dict": [87, 172], "state_dtyp": 146, "state_or_ptr": 141, "state_s": 146, "statement": [11, 154], "staten": [29, 33], "stateptr": 0, "states": 1, "static": [0, 1, 15, 20, 23, 81, 93, 111, 120, 121, 141, 142, 143, 144, 146, 155, 160, 165], "static_batch": [140, 155], "static_cast": [1, 149], "staticbatchingstat": 0, "statist": [0, 18, 22, 27, 42, 62, 99, 111, 121, 132, 155, 160], "statu": [0, 1, 9, 12, 16, 21, 28, 29, 30, 31, 32, 33, 34, 36, 90, 92, 105, 151, 155, 160], "status": 36, "std": [0, 1, 16, 111], "stddev": [50, 51], "stderr": [28, 31, 32], "stdev": [2, 22, 41, 42, 43, 65, 131, 132, 133], "stdin": 22, "stdit": 160, "stdout": [2, 22, 41, 42, 43, 65, 131, 132, 133], "steadi": 43, "steady_clock": 0, "stem": [12, 20], "step": [0, 1, 3, 10, 11, 12, 13, 14, 21, 26, 36, 40, 42, 43, 62, 69, 78, 79, 80, 97, 98, 100, 101, 103, 104, 105, 109, 113, 114, 115, 117, 121, 124, 125, 127, 129, 132, 133, 141, 146, 151, 155, 160, 162, 163, 164, 174, 177, 178], "stick": 33, "still": [2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 36, 39, 42, 94, 104, 113, 126, 127, 132, 133, 135, 141, 146, 147, 160], "stine": 28, "stoica": 11, "stop": [0, 1, 11, 16, 21, 29, 34, 40, 61, 63, 97, 98, 102, 111, 114, 115, 121, 132, 138, 146, 155, 158, 159, 160, 174, 176], "stop_reason": [18, 21, 28, 29, 30, 31, 32, 33, 155, 159, 160], "stop_token_id": [111, 155], "stop_words_data": 146, "stop_words_list": 146, "stopping_criteria": 146, "stoppingcriteria": [146, 160], "stoppingcriterialist": 146, "stoptokenid": [0, 111], "stopword": [0, 114], "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 1, 8, 9, 18, 21, 61, 68, 87, 92, 116, 118, 154, 155, 172], "store": [0, 1, 4, 10, 12, 13, 14, 16, 18, 22, 27, 36, 37, 40, 42, 54, 61, 62, 68, 79, 87, 92, 93, 94, 98, 99, 103, 113, 116, 117, 118, 125, 132, 140, 141, 143, 147, 149, 155, 156, 160, 161, 163, 172, 177], "store_tru": [62, 70], "stori": [67, 69], "str": [11, 36, 58, 59, 61, 63, 67, 68, 69, 70, 71, 73, 75, 87, 88, 124, 127, 141, 142, 143, 144, 146, 155, 172], "straight": 106, "straightforward": [10, 14, 20, 21, 33, 35], "strateg": 8, "strategi": [0, 2, 7, 14, 16, 17, 20, 40, 42, 47, 88, 91, 92, 101, 102, 104, 105, 119, 121, 132, 136, 141, 143, 147, 154, 155, 158, 160, 166], "stream": [0, 1, 9, 10, 11, 12, 15, 16, 21, 22, 23, 30, 36, 39, 41, 42, 47, 50, 51, 55, 61, 63, 88, 92, 110, 111, 125, 131, 141, 146, 147, 151, 155, 160], "stream_interv": [20, 21, 28, 29, 30, 33, 48, 155], "stream_ptr": [63, 102], "streaming_llm": 160, "streamingllm": [23, 144, 160], "streamlin": [39, 40, 42, 85, 86, 132, 154, 159, 170, 171], "streamptr": [0, 1, 111], "street": [33, 67], "strenum": [145, 155], "stretch": 29, "strict": [13, 14, 16, 28, 31, 32, 82, 104, 155, 166], "strict_bound": 141, "strict_dtyp": [141, 142], "strictbasemodel": 155, "stricter": 13, "strictli": [10, 42, 132, 155], "stride": [1, 141, 142], "strike": [16, 93, 121], "string": [0, 1, 20, 24, 27, 37, 42, 56, 76, 78, 90, 93, 111, 124, 132, 141, 144, 146, 155], "string_valu": 117, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [23, 155, 160], "strip_plan": 23, "strive": [85, 101, 158, 170], "strong": [16, 19], "strongli": 135, "strongly_typ": [155, 160], "struct": [0, 1, 116], "structur": [0, 10, 15, 19, 21, 24, 30, 33, 39, 42, 85, 92, 93, 103, 105, 112, 115, 116, 121, 132, 141, 147, 155, 160, 170], "structural_tag": [90, 155], "struggl": 67, "student": [33, 52, 57, 58, 60, 109, 152, 154, 159], "studi": [15, 98, 105, 133, 135, 136, 137, 139], "studio": 38, "style": [13, 79, 103, 104, 113, 121, 160], "sub": [11, 16, 104, 124, 127, 141, 155], "subclass": [1, 11, 63, 92, 103, 127, 155, 156, 161], "subcommad": 132, "subcommand": [22, 43, 160], "subcompon": [87, 172], "subdirectori": [42, 132], "subgraph": [104, 115, 141], "subject": [3, 5, 6, 7, 36, 79, 85, 110, 141, 153, 155, 170], "submiss": [39, 132], "submit": [0, 11, 26, 28, 29, 30, 31, 32, 36, 54, 118, 132, 155], "submit_sync": 155, "submittransferrequest": 0, "submodul": [2, 18, 103, 106, 156, 160, 161], "suboptim": [12, 21, 125], "subscript": 141, "subsequ": [8, 10, 11, 12, 14, 20, 37, 40, 61, 106, 117, 118, 121, 133, 160], "subset": [0, 14, 42, 99, 102, 103, 104, 111, 114, 125, 127, 132, 141, 155, 176], "substanti": [8, 13, 15, 17, 88, 104, 110, 117, 121], "substitut": [38, 155], "substr": [37, 155], "subsystem": 160, "subtract": 115, "succe": [1, 8, 147, 158, 160], "succeed": 146, "success": [1, 4, 10, 13, 20, 26, 28, 29, 31, 32, 37, 43, 111, 155], "successfulli": [1, 30, 62, 90, 121, 129, 135], "suddenli": 11, "sudo": [2, 13, 42, 85, 109, 132, 170], "suffer": [13, 16, 20], "suffici": [11, 12, 20, 39, 98, 134, 135], "suffix": [11, 28, 29, 31, 32, 36, 103, 155], "sugar": 11, "suggest": [7, 11, 16, 63, 67, 113, 135, 154], "suit": [8, 16, 17, 21, 24, 40, 42, 43, 93, 113, 132], "suitabl": [16, 17, 38, 61, 88, 96, 155], "sum": [1, 26, 28, 29, 31, 32, 104, 115, 123, 141, 177], "sum_": 8, "sum_of_token": 141, "summar": [5, 7, 16, 24, 43, 94, 95, 113, 121, 122, 123, 124, 132, 140, 147, 173], "summari": [9, 11, 16, 33, 116, 121], "summat": 141, "sunjiabin17": 160, "sunset": 69, "super": [11, 33, 61, 86, 87, 104, 115, 123, 126, 127, 150, 151, 156, 157, 161, 171, 172, 178], "superchip": 150, "superior": 8, "supplementari": 142, "suppli": [37, 63, 103, 118, 168], "support": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 21, 22, 23, 24, 26, 27, 31, 32, 33, 34, 37, 38, 39, 40, 43, 47, 56, 61, 64, 65, 66, 67, 70, 78, 79, 82, 87, 90, 92, 93, 94, 98, 99, 101, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 124, 127, 128, 130, 133, 135, 138, 139, 140, 141, 142, 144, 151, 153, 154, 155, 156, 159, 160, 161, 162, 163, 166, 168, 172, 175, 176, 177, 178], "support_partial_config": 82, "supports_backend": 155, "supportsinflightbatch": 1, "suppos": [103, 156, 161], "suprem": [52, 109, 152, 154, 159], "sure": [2, 10, 14, 16, 18, 21, 28, 29, 31, 32, 33, 34, 42, 61, 82, 88, 98, 103, 104, 106, 109, 110, 127, 129, 132, 140, 141, 154, 160, 166], "surfac": 36, "surpass": [79, 94, 113], "surprisingli": 10, "surround": [79, 113, 160], "survei": 10, "surviv": 61, "swa": 116, "swap": [16, 35, 92, 116], "swapcas": 155, "sweep": [4, 17, 29, 33, 34, 125, 138], "sweet": 138, "swept": [5, 10], "swiftli": [16, 19], "swiglu": [23, 141, 144, 160], "switch": [4, 7, 13, 15, 17, 24, 26, 88, 95, 106, 112, 117, 119, 120, 140, 147, 160, 173], "swizzl": 12, "sxm": [4, 43, 133, 135, 136, 137], "sy": [20, 61, 104, 160], "sym_min": 104, "symbol": 0, "symint": 104, "sync": 146, "sync_quant_config_with_kv_cache_config_dtyp": 155, "synchron": [0, 1, 8, 10, 16, 20, 40, 61, 92, 111, 125, 151, 155, 160], "syncmessag": 0, "syntact": 11, "syntax": [105, 141, 159], "synthet": [2, 22, 26, 42, 43, 50, 51, 65, 132], "synthetic_128_128": [42, 80, 132, 164], "synthetic_2048_2048": 133, "synthetic_2048_2048_1000": 133, "synthetic_lora_data": [42, 132], "system": [2, 4, 8, 10, 11, 12, 14, 15, 16, 18, 20, 21, 24, 26, 27, 28, 29, 31, 32, 37, 39, 43, 44, 45, 53, 54, 56, 61, 62, 64, 65, 66, 72, 73, 76, 82, 87, 90, 92, 97, 99, 105, 106, 109, 116, 117, 125, 134, 150, 153, 158, 159, 160, 166, 168, 174], "system_prompt": [24, 90], "systemat": [13, 16, 17], "t": [0, 1, 8, 9, 11, 13, 15, 16, 20, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 42, 47, 61, 63, 64, 65, 66, 71, 79, 88, 94, 98, 99, 103, 104, 109, 110, 113, 121, 125, 127, 128, 131, 132, 134, 138, 139, 141, 143, 146, 151, 155, 160, 168], "t4": 21, "t5": [100, 113, 114, 149, 150, 160], "t_": [14, 26, 28, 29, 31, 32], "t_2": 14, "t_5": 14, "tab": 155, "tabl": [0, 4, 7, 23, 35, 43, 99, 103, 114, 117, 132, 141, 142, 146, 150, 151, 155, 157, 160], "tabsiz": 155, "tackl": 15, "tactic": [15, 23], "tag": [0, 1, 11, 22, 24, 27, 28, 29, 31, 32, 37, 38, 106, 109, 128, 155, 160], "tagentrymap": 1, "tail": [8, 12], "tailor": [7, 135, 139], "take": [0, 1, 10, 11, 12, 14, 16, 17, 18, 19, 21, 28, 29, 31, 32, 33, 34, 40, 67, 79, 88, 93, 98, 103, 104, 110, 113, 114, 115, 117, 119, 124, 127, 132, 133, 134, 138, 141, 142, 144, 155, 160, 177], "takeawai": 33, "taken": [3, 4, 16, 126, 141], "talk": [16, 33, 67], "tanh": [141, 142], "tar": 24, "target": [0, 1, 2, 8, 15, 16, 17, 19, 22, 23, 27, 35, 47, 63, 82, 87, 91, 106, 126, 132, 139, 140, 144, 155, 160, 166, 172], "target_input_len": 22, "target_model": 103, "target_module_nam": [87, 172], "target_output_len": 22, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 67, "tarot_lora_dir": 67, "task": [0, 1, 8, 10, 11, 16, 19, 20, 21, 22, 24, 28, 31, 32, 40, 42, 58, 59, 63, 64, 65, 66, 78, 92, 94, 95, 97, 117, 118, 121, 123, 124, 132, 142, 146, 149, 154, 155, 160, 173, 174, 177], "task_collect": 11, "task_handl": 11, "task_id": [42, 118, 132], "task_vocab_s": 142, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskstatu": 11, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 160, "tb": [12, 155], "tconstptr": 1, "tcp": 129, "tdp": 43, "team": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 28, 29, 31, 32, 37, 88, 124, 127, 129, 150, 160], "teamwork": 12, "teardown": 1, "tech": [14, 16, 17, 88, 160], "technic": [8, 14, 15, 16, 20, 33, 99, 104, 116], "techniqu": [3, 8, 10, 12, 13, 14, 15, 16, 17, 55, 79, 91, 93, 94, 95, 98, 100, 104, 113, 115, 121, 125, 134, 135, 136, 137, 140, 149, 160, 173], "technologi": [13, 18, 52, 57, 58, 60, 61, 63, 109, 152, 154, 158, 159], "tediou": 37, "tee": 62, "tekit_2025": 132, "tell": [27, 45, 61, 67, 69, 73, 92, 103, 139, 159, 168], "temb": 142, "temp": [69, 146], "temperatur": [0, 1, 9, 11, 18, 21, 27, 28, 31, 32, 34, 42, 44, 45, 46, 47, 52, 57, 58, 59, 60, 63, 68, 69, 70, 76, 82, 88, 90, 102, 109, 114, 132, 133, 140, 146, 152, 154, 155, 159, 160, 166, 176], "tempfil": 61, "templat": [0, 1, 24, 26, 27, 37, 64, 87, 125, 126, 155, 172], "tempor": [8, 91, 146], "temporari": [36, 61, 87, 88, 110, 172], "temporarili": 12, "temporarydirectori": 61, "ten": [7, 10, 14, 16, 121], "tenant": 93, "tend": [11, 19, 140], "tensor": [1, 2, 3, 4, 5, 6, 10, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 27, 28, 29, 31, 32, 33, 34, 43, 60, 61, 63, 64, 65, 66, 87, 88, 91, 92, 98, 100, 102, 104, 110, 114, 119, 124, 125, 126, 132, 135, 136, 137, 139, 141, 142, 143, 144, 146, 149, 151, 155, 156, 158, 160, 161, 163, 166, 172], "tensor_dict": 146, "tensor_input": 115, "tensor_parallel_s": [27, 30, 60, 64, 65, 66, 70, 99, 133, 134, 135, 139, 140, 155], "tensor_shap": 126, "tensorconstptr": 1, "tensorflow": 21, "tensorinfo": 146, "tensorloc": 141, "tensormap": 1, "tensorparallel": [0, 1, 114], "tensorptr": [0, 1], "tensorrt": [1, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 82, 86, 87, 88, 90, 92, 93, 94, 98, 101, 103, 107, 108, 109, 111, 113, 114, 115, 116, 118, 119, 122, 123, 130, 131, 135, 136, 137, 139, 140, 141, 144, 146, 149, 151, 153, 154, 155, 156, 159, 161, 162, 163, 164, 166, 171, 175, 177, 178], "tensorrt_llm": [0, 1, 2, 10, 18, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 42, 43, 47, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 79, 82, 84, 85, 87, 90, 95, 98, 100, 102, 103, 104, 106, 107, 109, 111, 113, 114, 115, 118, 120, 122, 123, 125, 126, 127, 128, 129, 132, 133, 135, 139, 140, 141, 142, 143, 144, 145, 146, 151, 152, 153, 154, 155, 156, 159, 160, 161, 162, 163, 166, 169, 170, 172, 173, 175, 176, 177], "tensorrt_llm_gpt": 125, "tensorrt_llm_rouge1_threshold": 124, "tensorrtllm_backend": [118, 160], "tensortrt": 106, "tep": [88, 110], "tep4": 17, "term": [11, 12, 16, 17, 20, 33, 37, 108, 109, 125, 140, 141, 154], "termin": [0, 9, 11, 28, 29, 31, 32, 33, 34, 43, 106, 117, 129, 159, 160], "test": [1, 7, 11, 13, 14, 17, 19, 20, 22, 24, 27, 30, 42, 43, 45, 64, 70, 73, 79, 84, 85, 86, 99, 101, 103, 105, 106, 109, 113, 132, 133, 135, 136, 137, 138, 139, 140, 150, 155, 160, 168, 169, 170, 171, 177], "test_auto_dtype_with_helix": 91, "test_beam_search_larg": 160, "test_cas": 104, "test_cli_flow": 160, "test_data": 73, "test_disaggregated_serv": 91, "test_e2": 160, "test_generate_with_se": 160, "test_gpt_ib_ptun": 37, "test_graph_rewrit": 115, "test_list": 37, "test_llm_api": 36, "test_llm_openai_triton_1gpu": 37, "test_llm_qwen2audio_single_gpu": 37, "test_mla_helix": 91, "test_openai": 37, "test_qwen2audio": 37, "test_star_attention_input": 70, "test_text": 61, "test_triton": 37, "test_trt_llm": [122, 123, 124], "testb": 8, "testcas": 104, "testdeepseekv3lit": 91, "testgpt2": 160, "texec": [0, 88], "text": [0, 11, 17, 19, 22, 23, 26, 27, 28, 29, 31, 32, 36, 37, 40, 42, 43, 45, 47, 52, 53, 54, 55, 60, 61, 62, 68, 69, 70, 71, 73, 79, 90, 92, 96, 101, 102, 109, 111, 113, 114, 117, 132, 133, 140, 144, 146, 151, 152, 154, 155, 159, 160], "text0": 61, "text1": 61, "text_complet": [28, 31, 32], "text_diff": 155, "text_hidden_s": 143, "text_to_token": 63, "textattack": [150, 157], "textprompt": 155, "tg_group": 141, "tgt": [125, 141], "tgt_len": [141, 142], "tgt_seq_len": 141, "th": [1, 14, 124, 141], "than": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 40, 42, 43, 61, 62, 79, 88, 93, 94, 98, 99, 102, 104, 106, 110, 111, 113, 114, 115, 117, 121, 125, 132, 133, 134, 135, 138, 140, 141, 146, 147, 151, 155, 160, 163], "thank": [8, 10, 14, 16, 36, 160], "thankfulli": 10, "thecodewrangl": 160, "theft": 93, "thei": [0, 1, 10, 11, 12, 13, 14, 15, 16, 20, 21, 26, 29, 33, 36, 37, 38, 42, 79, 87, 92, 93, 96, 98, 103, 104, 106, 111, 113, 114, 118, 125, 126, 127, 132, 133, 135, 138, 139, 140, 141, 143, 149, 155, 160], "them": [0, 2, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 28, 29, 31, 32, 33, 34, 36, 40, 41, 61, 64, 65, 66, 82, 87, 88, 91, 92, 98, 103, 104, 111, 112, 115, 121, 122, 131, 132, 134, 136, 137, 138, 140, 141, 146, 147, 155, 156, 161, 166, 172], "themselv": 37, "theoret": [10, 12, 16, 105, 147], "theori": [21, 140], "therebi": [88, 110, 140], "therefor": [2, 11, 19, 20, 43, 104, 114, 122, 127, 141, 151, 177], "thermal": [42, 132], "theta": 141, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 47, 49, 52, 53, 54, 56, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 76, 78, 79, 82, 84, 85, 86, 87, 88, 91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 146, 147, 149, 151, 152, 153, 154, 155, 156, 159, 160, 161, 162, 163, 166, 168, 169, 170, 171, 172, 173, 174, 177, 178], "thin": 127, "thing": [17, 33, 52, 57, 58, 60, 98, 109, 114, 129, 138, 139, 152, 154, 159], "think": [11, 13, 14, 15, 24, 33, 35, 103, 136, 137, 157], "thinking_budget": 24, "third": [10, 11, 12, 17, 19, 37, 39, 93, 108, 109, 111, 155, 160], "this_modul": 61, "thorough": [16, 21], "those": [2, 12, 13, 14, 15, 16, 20, 23, 26, 27, 28, 29, 31, 32, 37, 41, 79, 82, 85, 92, 94, 98, 99, 111, 112, 113, 114, 124, 125, 131, 133, 139, 141, 142, 149, 155, 166, 170], "though": [10, 11, 14, 16, 17, 88, 98, 127, 138, 147], "thought": [11, 24, 33, 105], "thread": [0, 1, 10, 12, 16, 20, 47, 79, 92, 113, 119, 132, 146, 154, 155, 160], "three": [7, 8, 11, 12, 13, 15, 17, 22, 79, 96, 99, 104, 111, 124, 140, 141, 149, 155, 156, 161, 162, 163], "threshold": [0, 8, 13, 14, 36, 93, 141, 146, 155], "threw": 1, "throttl": [42, 132], "through": [0, 1, 2, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 23, 27, 29, 33, 34, 36, 39, 68, 79, 85, 87, 92, 93, 98, 99, 103, 106, 113, 114, 115, 119, 120, 121, 125, 126, 132, 133, 134, 135, 138, 139, 142, 158, 159, 160, 170, 172], "throughout": [8, 20, 133, 136, 137], "throughput": [0, 3, 4, 5, 9, 10, 12, 14, 16, 17, 18, 20, 35, 40, 41, 65, 68, 79, 80, 88, 91, 95, 96, 97, 98, 99, 105, 110, 111, 113, 131, 135, 138, 139, 140, 160, 163, 164, 173, 174], "throw": [0, 1], "thrown": 1, "thu": [2, 10, 13, 15, 16, 20, 38, 103, 104, 106, 117, 127, 141, 147], "thumb": [113, 134, 151], "ti": [14, 21, 79, 113], "tightli": 21, "tiiuae": [42, 132], "tile": 15, "tilen": 160, "time": [0, 1, 2, 5, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 33, 34, 40, 41, 42, 43, 52, 57, 58, 59, 60, 67, 88, 90, 93, 94, 97, 99, 103, 104, 105, 106, 109, 110, 111, 113, 117, 118, 119, 121, 122, 125, 131, 132, 133, 135, 136, 138, 140, 141, 144, 146, 151, 152, 154, 155, 158, 159, 160, 174, 177], "time_": 8, "time_embed_dim": 142, "time_encod": 146, "time_i": 8, "time_point": 0, "timedelta": 155, "timedout": 0, "timelin": [10, 17, 88, 124, 160], "timeout": [0, 8, 16, 27, 47, 88, 110, 155, 160], "timeout_it": [8, 29, 155], "timepoint": 0, "timestamp": 0, "timestep": [142, 143], "timestepembed": 142, "timezon": 90, "timingmetr": 0, "tini": 67, "tinyllama": [17, 27, 40, 44, 46, 50, 52, 53, 54, 56, 57, 58, 59, 60, 63, 67, 68, 69, 72, 74, 76, 81, 82, 85, 86, 88, 109, 152, 154, 159, 165, 166, 170, 171], "tip": [12, 105], "titl": [27, 56, 144, 155], "titlecas": 155, "tle": 122, "tllm": [89, 155, 157], "tllm_benchmark_req_queues_s": 20, "tllm_checkpoint_16gpu_tp8_pp2": 134, "tllm_ckpt_dir": 123, "tllm_engine_dir": 123, "tllm_kei": [126, 142], "tllm_llmapi_build_cach": 160, "tllm_llmapi_enable_nvtx": [41, 131], "tllm_log_level": [30, 62, 151], "tllm_nvtx_debug": [20, 41, 131], "tllm_override_layer_num": 160, "tllm_profile_record_gc": [20, 41, 131], "tllm_profile_start_stop": [20, 41, 131], "tllm_to_externel_key_dict": 126, "tllm_torch_profile_trac": [41, 131], "tllm_trace_model_forward": 160, "tllm_weight": 126, "tllmruntim": [1, 114, 151], "tlntin": 160, "tma": [12, 160], "tmp": [26, 28, 29, 30, 31, 32, 33, 34, 41, 42, 65, 80, 109, 118, 122, 131, 132, 134, 164], "tmp9so41y3r": [42, 132], "tmpowsrb_f4": [42, 132], "tmpxhdvasex": [42, 132], "to_arrai": 141, "to_dict": [143, 155], "to_json": 155, "to_json_fil": 143, "to_layer_quant_config": 143, "to_legacy_set": 144, "to_python": 155, "to_str": [0, 1, 111], "to_trt": 143, "tobyt": 1, "todo": 141, "togeth": [3, 10, 11, 12, 13, 17, 18, 21, 23, 69, 79, 98, 111, 113, 114, 118, 125, 144, 146, 149, 158, 160], "toggl": [41, 62, 93, 101, 131], "toi": [98, 138], "toitensor": 0, "tojsonstr": 0, "tok": [3, 5, 6, 17, 26, 28, 29, 31, 32, 105, 139], "token": [0, 1, 2, 3, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 30, 33, 34, 39, 40, 41, 42, 43, 50, 51, 54, 56, 61, 62, 63, 65, 69, 70, 78, 79, 80, 81, 82, 88, 90, 91, 92, 93, 94, 96, 98, 99, 103, 104, 105, 111, 112, 113, 114, 116, 117, 121, 125, 128, 131, 132, 133, 135, 136, 139, 141, 142, 144, 146, 147, 149, 155, 156, 160, 161, 162, 163, 164, 165, 166], "token_count": 63, "token_drop": 142, "token_end": 155, "token_id": [28, 31, 32, 47, 102, 155], "token_ids_diff": 155, "token_norm_dist": 105, "token_range_retention_config": [54, 155], "token_start": 155, "token_type_id": [143, 146], "token_unif_dist": 105, "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 155, "tokenized_request": [28, 31, 32], "tokenizer_dir": [123, 125, 151, 155], "tokenizer_image_token": 146, "tokenizer_max_seq_length": [135, 143, 145, 155], "tokenizer_mod": 155, "tokenizer_revis": 155, "tokenizer_str": [0, 111], "tokenizerbas": 155, "tokenizerstr": [0, 111], "tokenlogprob": 155, "tokenrangeretentionconfig": [0, 54, 93, 155], "tokenrangeretentionprior": 0, "tokens_": 8, "tokens_i": 8, "tokens_per_block": [23, 61, 62, 70, 84, 91, 94, 116, 117, 144, 146, 155, 160, 177], "tokenselectedexpert": 12, "tokensperblock": [0, 1, 27, 114], "tokensperstep": 1, "tokensprompt": 155, "tokenstart": 0, "tokyo": [27, 45, 73], "toler": [7, 12, 16, 160], "tolist": 104, "toml": [87, 172], "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 160, "too": [0, 2, 10, 15, 16, 20, 27, 33, 36, 53, 88, 103, 104, 110, 111, 113, 133, 138, 146, 151, 155], "took": 133, "tool": [2, 10, 11, 15, 16, 20, 21, 22, 26, 27, 28, 29, 31, 32, 38, 90, 93, 96, 99, 124, 132, 158, 159, 160], "tool_cal": [18, 21, 29, 30, 33, 159], "tool_get_current_d": 90, "tool_get_current_weath": 90, "tool_pars": 27, "toolcal": 11, "toolkit": [7, 13, 18, 21, 28, 29, 30, 31, 32, 33, 34, 109, 127, 162], "toolset": 160, "top": [0, 8, 9, 10, 12, 14, 15, 16, 17, 24, 27, 31, 32, 37, 40, 69, 70, 78, 79, 88, 99, 102, 103, 104, 112, 113, 114, 121, 125, 141, 155, 160, 176], "top1": 13, "top_k": [9, 69, 82, 102, 114, 146, 155, 160, 166, 176], "top_k_valu": 69, "top_p": [9, 11, 21, 29, 30, 33, 34, 52, 57, 58, 59, 60, 63, 68, 69, 70, 102, 109, 114, 133, 140, 146, 152, 154, 155, 159, 176], "top_p_decai": [146, 155], "top_p_min": [146, 155], "top_p_reset_id": [146, 155], "top_p_valu": 69, "topenkoff": 160, "topic": [16, 26, 139], "topk": [0, 1, 13, 15, 20, 70, 112, 114, 121, 141, 155, 160], "topk_logit": 111, "topklastdim": 141, "topklogit": 111, "topkmedusahead": 1, "topktopp": [0, 114], "topmodelmixin": [127, 143], "topn": 13, "topologi": [16, 20], "topp": [0, 1, 114, 160], "toppdecai": [0, 1, 114], "toppmin": [0, 1, 114, 155], "toppresetid": [0, 1, 114], "topr": 155, "torch": [10, 61, 63, 79, 80, 81, 82, 84, 85, 86, 87, 89, 92, 101, 102, 105, 106, 109, 113, 126, 132, 141, 146, 151, 155, 156, 157, 160, 161, 164, 165, 166, 168, 169, 170, 171, 172], "torch_compile_config": [104, 155, 160], "torch_library_frag": 104, "torchcompileconfig": [105, 155], "torchinductor": 104, "torchllmarg": [26, 28, 29, 31, 32, 33, 34, 61, 105, 155, 160], "torchsampl": [9, 103, 155], "torchvis": 109, "toronto": 29, "tostr": [0, 1], "total": [0, 1, 2, 8, 10, 14, 16, 17, 20, 22, 23, 24, 27, 33, 34, 37, 42, 43, 64, 65, 66, 68, 70, 79, 98, 112, 113, 114, 121, 124, 126, 132, 133, 134, 147, 155, 177], "total_lat": [3, 6], "total_token": [18, 21, 28, 29, 30, 31, 32, 33, 34, 159], "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [128, 156, 161], "tourist": 33, "toward": [16, 91, 97, 101, 103, 174], "tower": 34, "tp": [0, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 18, 21, 22, 27, 39, 42, 43, 64, 65, 66, 80, 88, 91, 101, 110, 112, 114, 118, 125, 132, 133, 141, 155, 158, 160, 164], "tp1": [3, 4, 5, 10, 43], "tp2": [17, 43, 88, 132, 160], "tp4": [10, 12, 13, 29, 43], "tp4ep2": 13, "tp8": [5, 13, 15, 29, 43], "tp8ep2": 13, "tp_1_pp_1": 132, "tp_dim": [126, 142], "tp_group": [141, 142], "tp_plan": 166, "tp_rank": [126, 141, 142], "tp_size": [9, 18, 21, 22, 24, 26, 27, 29, 43, 48, 64, 66, 70, 112, 118, 124, 125, 126, 127, 132, 134, 141, 142, 145, 160], "tp_split_dim": 142, "tpot": [2, 6, 8, 17, 30, 33, 34, 43, 88], "tprank": 1, "tps_": 8, "tpsize": 1, "tqdm": [126, 155, 160], "trace": [16, 22, 23, 24, 27, 41, 105, 127, 131, 151, 155], "trace_head": 155, "track": [1, 16, 36, 38, 79, 85, 98, 113, 116, 141, 155, 170], "trade": [2, 15, 21, 40, 103, 104, 117], "tradeoff": [7, 13, 14, 105, 135], "tradit": [0, 8, 11, 91, 99], "traffic": [16, 17, 35, 80, 88, 93, 99, 164, 168], "trail": 155, "train": [4, 7, 11, 14, 19, 29, 103, 121, 123, 124, 125, 127, 132, 141, 151, 156, 161], "trainabl": [95, 173], "trait": 160, "trampolin": 10, "transa": 141, "transb": 141, "transceiv": [0, 155], "transfer": [0, 15, 16, 17, 20, 88, 92, 110, 125, 155, 160], "transfer_mod": [93, 155], "transferag": 39, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 20, 22, 23, 24, 27, 40, 47, 63, 79, 80, 82, 85, 86, 87, 96, 98, 104, 112, 113, 121, 123, 124, 125, 126, 143, 147, 150, 151, 155, 156, 160, 161, 162, 164, 166, 168, 169, 170, 171, 172, 177], "transformerstoken": 155, "transit": [8, 11], "translat": [20, 21, 38, 85, 95, 140, 155, 160, 170, 173], "transmiss": [0, 17, 20, 88, 105, 110, 119], "transmit": [12, 39, 88, 92, 110, 119], "transpar": [10, 16, 19, 20, 39], "transparent_hugepag": 20, "transport": 12, "transpos": [1, 124, 141], "transposit": 141, "travel": 29, "travers": [10, 125], "treat": [13, 20, 79, 94, 104, 113, 141, 155], "tree": [0, 9, 10, 11, 22, 29, 93, 103, 107, 132, 146, 151, 177], "tremend": 10, "trend": 19, "tri": [15, 104, 178], "tricki": 143, "trigger": [10, 12, 16, 20, 23, 36, 47, 79, 87, 90, 92, 104, 105, 113, 115, 125, 144, 154, 155], "trigger_completion_at_end": 141, "triggered_tag": 90, "trim": 1, "trimpool": 1, "trip": 33, "triton": [10, 37, 70, 82, 84, 86, 105, 117, 118, 121, 125, 155, 158, 160, 166, 169, 171], "triton_serv": 37, "tritonserv": 160, "trivial": 125, "troubleshoot": [105, 160], "trt": [0, 4, 11, 22, 27, 49, 79, 84, 85, 86, 88, 90, 98, 103, 104, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 125, 126, 128, 132, 138, 141, 143, 145, 146, 147, 151, 160, 163, 166, 169, 170, 171], "trt_ckpt": [118, 122, 124, 151], "trt_engin": [118, 122, 124, 151], "trt_llm_data": [38, 70], "trt_llm_disable_load_weights_in_parallel": 18, "trt_root": 2, "trt_tensor": [125, 141], "trtdatatyp": 1, "trtgptmodel": 147, "trtgptmodeloptionalparam": 160, "trtgptmodelv1": 160, "trtllm": [2, 9, 11, 12, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 33, 34, 35, 38, 42, 43, 44, 45, 46, 47, 48, 50, 51, 55, 64, 70, 72, 73, 74, 75, 76, 81, 86, 87, 94, 99, 104, 105, 110, 117, 118, 122, 123, 124, 125, 127, 132, 135, 136, 137, 138, 139, 147, 151, 155, 160, 165, 170, 171, 172], "trtllm_deep_ep_token_limit": 160, "trtllm_dg_jit_use_nvcc": 2, "trtllm_dir": [28, 29, 31, 32, 33, 34, 35], "trtllm_disable_kv_cache_transfer_overlap": [88, 110], "trtllm_disable_unified_convert": 126, "trtllm_enable_kvcache_receive_parallel": [88, 110], "trtllm_enable_mmha_multi_block_debug": 132, "trtllm_enable_pdl": [2, 9, 12, 13, 14, 18, 21, 30, 132], "trtllm_force_xqa": [79, 113], "trtllm_kvcache_send_max_concurrency_num": [88, 110], "trtllm_kvcache_transfer_buffer_s": [88, 110], "trtllm_kvcache_transfer_use_async_buff": [88, 110], "trtllm_llama_eager_fusion_dis": 160, "trtllm_mmha_blocks_per_sequ": 132, "trtllm_mmha_kernel_block_s": 132, "trtllm_model": 126, "trtllm_modules_to_hf_modul": [42, 95, 132, 146, 173], "trtllm_pdl_overlap_ratio": 132, "trtllm_precompiled_loc": 106, "trtllm_prefetch_ratio": 132, "trtllm_repo": 30, "trtllm_request_kv_cache_concurr": [88, 110], "trtllm_server_disable_gc": 30, "trtllm_try_zcopy_for_kvcache_transf": [88, 110], "trtllm_use_precompil": 106, "trtllm_wheel_path": 30, "trtllm_worker_disable_gc": 30, "trtllmarg": [105, 155], "trtllmattent": [105, 163], "trtllmattentionwrapp": 160, "trtllmsampler": 155, "trtllmworker": 11, "trtlmmdatatyp": 0, "true": [0, 1, 2, 8, 9, 11, 13, 14, 15, 16, 18, 19, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 43, 47, 48, 59, 61, 62, 63, 65, 68, 69, 71, 78, 79, 81, 82, 88, 90, 92, 94, 97, 99, 102, 103, 104, 111, 114, 115, 117, 121, 124, 131, 132, 135, 139, 141, 142, 143, 144, 146, 147, 151, 155, 160, 165, 174], "true_output_valu": 141, "true_valu": 141, "truli": 20, "truncat": [24, 155, 160], "truncate_prompt_token": [155, 160], "truncation_sid": 11, "trust": [15, 22, 155], "trust_remote_cod": [9, 11, 18, 21, 22, 24, 26, 27, 30, 48, 155, 160], "truth": [85, 170], "try": [0, 1, 12, 16, 18, 19, 20, 21, 28, 29, 31, 32, 33, 34, 43, 53, 67, 76, 93, 96, 104, 107, 111, 123, 127, 135, 138, 139, 140, 147, 151, 153, 154, 155, 159, 166, 175], "tsuji": [42, 132], "ttensor": 1, "ttft": [8, 12, 17, 30, 33, 34, 42, 43, 88, 98, 135, 138, 139, 140, 160], "ttim": 160, "ttl": 13, "tunabl": [136, 137, 155], "tune": [0, 4, 7, 8, 13, 15, 16, 17, 21, 22, 23, 26, 35, 42, 43, 68, 95, 98, 99, 103, 105, 111, 121, 132, 135, 137, 139, 142, 143, 146, 147, 155, 158, 160, 168, 173], "tuner": 0, "tupl": [0, 1, 61, 92, 141, 142, 146, 155, 178], "turn": [10, 15, 17, 21, 94, 104, 106, 113, 114, 117, 121, 135, 146, 147, 160], "turn1": 19, "turn2": 19, "turnaround": 37, "tushar": 160, "tutori": [26, 90], "tweak": 140, "twice": [10, 61, 125], "two": [0, 4, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 29, 33, 36, 37, 42, 45, 61, 62, 73, 79, 87, 88, 89, 90, 92, 93, 94, 98, 99, 102, 104, 105, 106, 110, 111, 112, 113, 114, 115, 117, 118, 119, 121, 122, 124, 125, 127, 132, 133, 135, 138, 140, 141, 142, 144, 154, 155, 157, 159, 160, 162, 172, 176, 177, 178], "twofold": 121, "twoshot": [119, 141, 155], "txt": [2, 21, 22, 24, 37, 41, 42, 65, 80, 101, 109, 127, 131, 132, 133, 160, 164], "type": [1, 4, 7, 9, 11, 12, 15, 17, 18, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 42, 44, 45, 46, 50, 51, 56, 61, 63, 67, 68, 69, 70, 71, 73, 76, 79, 81, 86, 87, 88, 90, 93, 98, 100, 102, 103, 104, 105, 110, 111, 113, 114, 115, 118, 124, 125, 132, 135, 139, 141, 143, 144, 145, 146, 149, 150, 151, 155, 156, 159, 160, 161, 162, 163, 165, 168, 171, 172, 177], "typedef": [0, 1], "typeerror": 155, "typenam": [0, 1, 125], "typetrait": 0, "typic": [0, 7, 12, 14, 15, 16, 17, 20, 26, 27, 28, 29, 31, 32, 33, 34, 88, 91, 96, 104, 110, 115, 123, 125, 127, 134, 135, 139, 140, 144, 146, 147, 154, 156, 160, 161], "typo": 160, "u": [1, 8, 12, 15, 16, 17, 28, 29, 31, 32, 33, 42, 43, 52, 57, 58, 59, 60, 104, 109, 115, 128, 132, 152, 154, 155, 159, 160], "ub": [119, 141, 155], "ub_oneshot": 132, "ub_tp_siz": 132, "ubuntu": [109, 153, 160], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucsd": 11, "ucx": [0, 17, 30, 39, 88, 110, 155, 160], "ucx_cuda_ipc_enable_mnnvl": [88, 110], "ucx_max_rndv_rail": [88, 110], "ucx_net_devic": [88, 110], "ucx_rndv_schem": [88, 110], "ue8m0": 100, "uid": [0, 38, 146], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 141], "uint64": [1, 117], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 15, "uk_bgemm": 13, "ulimit": [9, 18, 21, 26, 106, 151, 159], "ultim": 134, "ultra": [86, 171], "ulyss": 160, "unabl": 138, "unaccept": 135, "unaffect": 20, "unaligneddata": 12, "unari": 141, "unaryoper": 141, "unbind": 141, "unblock": [10, 20], "uncas": [150, 155, 157], "uncertain_word": 11, "uncertainti": [78, 121], "unchang": [16, 37, 82, 104, 121, 139, 141, 166], "uncom": 38, "uncommit": 36, "uncommon": 125, "undefin": 141, "under": [0, 7, 8, 9, 17, 19, 20, 21, 23, 31, 32, 36, 37, 42, 43, 85, 88, 94, 96, 101, 106, 132, 151, 154, 155, 160, 170], "undergo": [85, 86, 170, 171], "underli": [0, 1, 16, 17, 39, 87, 88, 92, 115, 121, 155, 172], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 135, "understand": [8, 16, 37, 39, 41, 42, 82, 98, 99, 106, 131, 166], "understood": [138, 155], "underutil": [20, 121], "underwai": [17, 92], "uneven": [8, 160], "unevenli": 13, "unexpect": [10, 20, 151, 155, 160], "unfinish": 0, "unfortun": 20, "unfus": [141, 160], "unfuse_qkv_project": 143, "ungath": 1, "unguid": 56, "unicast": 1, "unicastconfigur": 1, "unicod": 155, "unicodeencodeerror": 155, "unidirect": 39, "unif": 160, "unifi": [7, 24, 85, 87, 124, 127, 160, 170, 172], "uniform": [8, 22, 42, 43, 132, 141, 166], "uniformli": 8, "uniniti": [79, 163], "uninstal": 109, "union": [141, 155], "uniqu": [0, 1, 42, 61, 87, 92, 94, 113, 114, 116, 118, 121, 124, 132, 155, 172], "unique_ptr": [0, 1], "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": [0, 1], "unit": [1, 15, 18, 21, 28, 29, 30, 31, 32, 33, 36, 42, 52, 53, 54, 61, 90, 105, 106, 109, 116, 126, 132, 133, 140, 152, 154, 159, 160], "unittest": [36, 37, 70, 91], "univers": [8, 29, 52, 57, 58, 60, 109, 152, 154, 159], "unknown": [1, 22, 155], "unleash": 19, "unless": [0, 33, 36, 47, 98, 134, 139, 140, 155], "unlik": [14, 33, 61, 82, 91, 102, 104, 117, 121, 166], "unlock": 16, "unmatch": 12, "unnecessari": [10, 11, 33, 115, 156, 160, 161, 178], "unneed": [13, 79, 113], "unordered_map": [0, 1, 111], "unpack": 10, "unpatchifi": 143, "unpredict": 8, "unregist": 0, "unrol": 104, "unsaf": [88, 110], "unsaferemov": 1, "unsatisfactori": 20, "unschedul": [98, 138], "unset": [16, 88, 110, 140], "unshard": 166, "unsign": 1, "unspecifi": [23, 24, 27, 141], "unsqueez": [1, 141], "unstabl": [36, 127, 155], "unsuit": 92, "unsupport": [37, 104, 160], "untest": [89, 157], "until": [0, 1, 8, 10, 11, 12, 16, 26, 28, 29, 31, 32, 93, 103, 111, 114, 117, 121, 155], "untouch": [141, 155], "unus": [0, 42, 109, 132], "up": [0, 2, 4, 5, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 42, 56, 61, 63, 79, 87, 88, 98, 103, 104, 105, 110, 113, 114, 118, 121, 132, 138, 139, 144, 155, 159, 160, 172, 177], "up_proj": [126, 166], "upcast": 141, "upcast_attent": 142, "upcast_softmax": 142, "upcom": [7, 85, 170, 177], "updat": [0, 2, 5, 10, 14, 15, 16, 18, 19, 23, 33, 34, 40, 85, 87, 92, 96, 97, 99, 104, 106, 116, 121, 125, 126, 127, 128, 141, 144, 146, 151, 155, 170, 172, 174, 177], "update_forward_ref": 155, "update_key_map": 126, "update_kv_cache_typ": 155, "update_output_ids_by_offset": 146, "update_resourc": [162, 177], "update_state_after_alloc": [61, 92], "update_strategi": 141, "updatenumreturnbeam": 0, "updatespositionid": 1, "upfront": 33, "upgrad": [39, 85, 109, 160, 170], "uplift": [135, 138, 139], "upon": [1, 10, 12, 18, 20, 39, 40, 43, 87, 121, 139, 151, 160], "upper": [8, 71, 104, 132, 141, 147, 155], "uppercas": 155, "upsampl": 12, "upstat": 33, "uq_qr_gemm": 13, "uri": 27, "url": [17, 27, 45, 50, 51, 73, 88, 106, 109, 155, 160], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 42, 43, 47, 49, 52, 54, 55, 56, 59, 61, 62, 64, 65, 66, 67, 69, 70, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, 119, 120, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134, 135, 136, 137, 138, 141, 142, 143, 144, 146, 149, 151, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 176, 177, 178], "usabl": [11, 20, 153, 155], "usag": [0, 3, 6, 15, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 36, 52, 64, 65, 66, 68, 70, 78, 79, 90, 93, 94, 99, 105, 107, 110, 113, 115, 116, 125, 127, 132, 139, 140, 141, 152, 154, 155, 159, 160, 163], "use_beam_hyp": 146, "use_beam_search": [69, 102, 155, 160], "use_cach": [141, 142, 143], "use_context_fmha_for_gener": 160, "use_cuda_graph": 70, "use_custom_all_reduc": 160, "use_diff_of_squar": 141, "use_dynamic_tre": 155, "use_embedding_shar": 160, "use_fast": 11, "use_fp32_acc": 141, "use_fp8": 142, "use_fp8_context_fmha": [23, 113, 132, 144, 160], "use_fused_mlp": [23, 132, 144, 160], "use_gemm_allreduce_plugin": 146, "use_gpt_attention_plugin": 146, "use_gpu_direct_storag": 146, "use_implicit_relative_attent": 142, "use_kv_cach": [142, 146, 160], "use_logn_sc": 142, "use_lora": 143, "use_lora_plugin": 146, "use_low_precision_moe_combin": [30, 155], "use_mamba_conv1d_plugin": 146, "use_meta_recip": 155, "use_modelopt_quant": 127, "use_mrop": 155, "use_mtp_vanilla": 155, "use_nv_sa_benchmark": 30, "use_one_more_block": 146, "use_paged_context_fmha": [23, 79, 113, 117, 132, 135, 139, 144], "use_parallel_embed": [124, 125, 143], "use_preload": 143, "use_prompt_tun": [143, 160], "use_py_sess": 151, "use_refit": 155, "use_relaxed_acceptance_for_think": [13, 14, 71, 103, 155], "use_runtime_default": 146, "use_safetensors_load": 143, "use_strip_plan": 155, "use_torch_sampl": 9, "use_tqdm": 155, "use_uvm": [93, 155], "use_variable_beam_width_search": 146, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": [0, 27], "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 114], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 56, 72, 73, 76, 79, 82, 87, 88, 90, 91, 93, 94, 98, 99, 104, 106, 108, 109, 110, 111, 113, 114, 115, 117, 118, 119, 120, 125, 126, 127, 128, 131, 132, 138, 139, 140, 141, 143, 144, 147, 149, 151, 154, 155, 158, 159, 160, 166, 168, 172], "user_buff": [23, 135, 144], "user_prompt": 90, "user_provid": [143, 155], "userandomacceptancethreshold": 1, "userbuff": [104, 155, 160], "userepetitionpenalti": 0, "usernam": [9, 18], "userprovideddecodingconfig": [103, 105, 155], "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "useuvm": 0, "usevariablebeamwidthsearch": 0, "using_oss_cutlass_": 120, "using_oss_cutlass_low_latency_gemm": 120, "using_oss_cutlass_moe_gemm": 120, "usr": [2, 27, 29, 30, 33, 34, 44, 45, 46, 48, 50, 51, 124, 132], "usual": [14, 33, 43, 94, 103, 109, 125, 127, 133, 139, 141, 144, 155, 177], "utf": 155, "utf8": 155, "util": [0, 1, 2, 3, 8, 11, 12, 13, 15, 16, 17, 18, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34, 40, 41, 42, 52, 68, 79, 80, 82, 88, 91, 94, 96, 97, 98, 103, 113, 114, 121, 125, 131, 132, 135, 139, 140, 144, 147, 155, 158, 160, 163, 164, 166, 174], "uv": 15, "uv_gemm": 13, "uvicorn": [26, 30], "uvm": [0, 1, 20, 155], "v": [1, 2, 3, 4, 7, 9, 11, 13, 15, 18, 19, 21, 28, 29, 31, 32, 33, 36, 38, 79, 91, 98, 105, 113, 114, 118, 141, 146, 149, 150, 151, 156, 157, 161, 163], "v0": [3, 4, 5, 6, 42, 43, 86, 97, 98, 118, 132, 150, 157, 160, 171, 174], "v1": [9, 12, 17, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 40, 43, 44, 45, 46, 50, 52, 53, 54, 56, 57, 58, 59, 60, 63, 67, 68, 69, 72, 73, 74, 75, 76, 81, 82, 85, 86, 88, 90, 95, 109, 150, 152, 154, 157, 159, 160, 165, 166, 168, 170, 171, 173], "v10": 160, "v100": 160, "v12": 160, "v2": [7, 12, 15, 24, 35, 36, 86, 87, 100, 149, 150, 160, 171, 172], "v3": [8, 14, 16, 24, 27, 41, 91, 99, 103, 131, 149, 150, 157, 158, 160], "v9": 5, "v_dim": 141, "v_head_dim": [141, 142], "v_proj": [42, 87, 95, 126, 132, 156, 161, 166, 172, 173], "vacat": [52, 109, 152, 154, 159], "valid": [0, 1, 8, 10, 12, 14, 16, 20, 28, 29, 31, 32, 43, 82, 86, 87, 88, 91, 94, 99, 103, 111, 121, 141, 144, 146, 155, 160, 166, 168, 171, 172], "validate_and_init_token": 155, "validate_assign": 144, "validate_attention_dp_config": 155, "validate_batch_wait_max_tokens_ratio": 155, "validate_batch_wait_timeout_it": 155, "validate_batch_wait_timeout_m": 155, "validate_build_config_remain": 155, "validate_build_config_with_runtime_param": 155, "validate_capture_num_token": 155, "validate_checkpoint_format": 155, "validate_cuda_graph_config": 155, "validate_cuda_graph_max_batch_s": 155, "validate_draft_len_schedule_and_sort": 155, "validate_dtyp": 155, "validate_dtype_not_auto": 144, "validate_enable_build_cach": 155, "validate_free_gpu_memory_fract": 155, "validate_gpus_per_nod": 155, "validate_kv_cache_dtyp": 155, "validate_load_balanc": 155, "validate_lora_config_consist": 155, "validate_max_attention_window": 155, "validate_max_gpu_total_byt": 155, "validate_misc": 155, "validate_model": 155, "validate_model_format_misc": 155, "validate_parallel_config": 155, "validate_peft_cache_config": 155, "validate_positive_valu": 155, "validate_quant_config": 155, "validate_ray_worker_extension_cl": 155, "validate_runtime_arg": 155, "validate_speculative_config": 155, "validate_stream_interv": 155, "validate_torch_compile_config": 155, "validate_torch_compile_max_num_stream": 155, "validatevec": 1, "validationerror": 155, "validmpiconfig": 1, "valu": [0, 1, 2, 3, 4, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 40, 42, 47, 62, 63, 68, 69, 78, 79, 81, 82, 88, 90, 92, 93, 94, 96, 98, 103, 104, 110, 113, 114, 116, 117, 118, 119, 122, 124, 125, 126, 132, 133, 135, 138, 140, 141, 143, 144, 145, 146, 147, 149, 151, 155, 160, 163, 165, 166, 177, 178], "valuabl": [8, 13, 16, 17], "value_typ": 0, "valueerror": [70, 71, 155], "valuestatu": 1, "vanilla": [70, 79, 113, 155, 160, 163], "vanillaattent": [79, 163], "var": [141, 155], "vari": [5, 8, 11, 16, 17, 18, 20, 26, 28, 29, 31, 32, 88, 104, 138, 139, 157, 177], "variabl": [0, 1, 2, 5, 12, 13, 16, 17, 20, 28, 38, 41, 42, 64, 65, 66, 83, 93, 104, 105, 109, 114, 116, 126, 131, 132, 151, 154, 155, 160, 167], "variabledraftlength": 1, "varianc": [8, 15, 135, 138, 139, 141], "variant": [0, 3, 10, 14, 15, 21, 43, 79, 101, 103, 104, 111, 113, 127, 141, 160, 163], "variat": 8, "varieti": [42, 86, 92, 96, 100, 132, 133, 160, 171], "variou": [11, 16, 17, 20, 22, 26, 27, 38, 42, 43, 68, 69, 79, 81, 85, 86, 87, 96, 106, 113, 121, 132, 135, 138, 154, 155, 158, 159, 160, 165, 170, 171], "varnam": 1, "vartyp": 1, "vastli": 8, "vboost": [2, 13, 42, 132], "vbw": 160, "ve": [12, 13, 67, 104], "vec": [0, 1], "vec2": 141, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 15, 111, 113, 114, 116, 118, 141], "vecuniquetoken": [0, 1], "vehicl": 18, "verb": 36, "verbatim": 143, "verbos": [22, 23, 24, 27, 42, 83, 132, 155, 167], "veri": [7, 12, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 62, 79, 98, 103, 113, 124, 125, 133, 134, 135, 160], "verif": [0, 10, 11, 14, 19, 121, 155], "verifi": [9, 10, 12, 14, 18, 19, 20, 21, 24, 33, 34, 37, 61, 62, 91, 121, 139, 141, 160], "verification_batch": 19, "verificationsets": 0, "verl": 101, "vermont": 29, "versa": [15, 117], "version": [0, 1, 2, 10, 12, 13, 15, 16, 20, 21, 24, 27, 28, 31, 32, 36, 38, 42, 47, 79, 93, 98, 103, 106, 107, 109, 110, 113, 114, 124, 126, 127, 132, 133, 141, 151, 153, 155, 160], "versu": [11, 33], "vertic": [104, 141], "vertical_strid": 142, "vgqa": 116, "via": [0, 10, 13, 16, 17, 20, 21, 26, 28, 29, 31, 32, 33, 34, 36, 37, 42, 64, 65, 66, 67, 78, 82, 84, 85, 86, 88, 103, 104, 106, 108, 119, 120, 121, 132, 135, 136, 137, 139, 140, 141, 153, 155, 159, 160, 166, 169, 170, 171], "vice": [15, 117], "vicuna": 121, "video": [22, 27, 42, 45, 73, 132, 146, 150, 157, 160], "video_grid_thw": 146, "video_path": 146, "video_preprocess": 146, "video_url": [27, 45, 73], "view": [1, 10, 14, 16, 18, 33, 82, 104, 141, 146, 166], "vila": [27, 45, 73, 100, 148, 149, 150, 157, 158, 160], "vinyl": [42, 132], "violat": [10, 160], "virtual": [0, 1, 142], "virtualmemorymanagertest": 1, "vision": [21, 22, 85, 96, 100, 146, 149, 150, 155, 157, 158, 160, 170], "vision_grid_thw": 146, "vision_length": 141, "vision_model_typ": 143, "vision_start": 141, "vision_token_mask": 142, "visit": [13, 85, 121, 160, 170], "visual": [8, 18, 38, 98, 138, 155, 160], "visual_engine_dir": 146, "visual_featur": 146, "visualize_network": [23, 155, 160], "vit": [155, 160], "vital": [7, 115], "vl": [26, 27, 42, 45, 51, 73, 96, 132, 148, 150, 158, 160], "vllm": [10, 11, 160], "vlm": [85, 150, 160, 170], "vocab": [103, 141, 146], "vocab_embed": [123, 126], "vocab_s": [0, 10, 12, 124, 126, 142, 143, 146, 155, 156, 161], "vocab_size_pad": 146, "vocabs": [1, 114], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 10, 43, 114, 117, 121, 142, 146], "void": [0, 1, 12, 111, 125], "volatil": 8, "volta": 160, "volum": [1, 8, 42, 105, 106, 119, 132], "volumenonneg": 1, "vonjackustc": 160, "vote": [52, 109, 152, 154, 159], "vram": 62, "vswa": 116, "vt": 33, "vulner": 160, "vultureprim": 160, "w": [1, 2, 6, 9, 10, 13, 15, 18, 19, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 88, 90, 141, 143, 149, 150, 160], "w1": 141, "w4a": [149, 160], "w4a16": [7, 22, 100, 124, 143, 155], "w4a16_awq": [22, 47, 124, 127, 155], "w4a16_gptq": [22, 124, 155], "w4a16_mxfp4": 155, "w4a8": [7, 100, 160], "w4a8_awq": [22, 124, 127, 155], "w4a8_mxfp4_fp8": [155, 160], "w4a8_mxfp4_mxfp8": 155, "w4a8_nvfp4_fp8": 155, "w4a8_qserve_per_channel": 155, "w4a8_qserve_per_group": 155, "w4aint8": 160, "w8a": 149, "w8a16": [7, 22, 124, 143, 155], "w8a16_gptq": 155, "w8a8": [4, 7], "w8a8_sq_per_channel": [124, 155], "w8a8_sq_per_channel_per_tensor_plugin": [143, 155], "w8a8_sq_per_channel_per_token_plugin": [143, 155], "w8a8_sq_per_tensor_per_token_plugin": [143, 155], "w8a8_sq_per_tensor_plugin": [143, 155], "wa": [0, 1, 8, 10, 12, 14, 15, 16, 20, 29, 39, 42, 43, 62, 79, 92, 93, 98, 107, 109, 111, 113, 114, 124, 132, 133, 135, 138, 139, 140, 142, 149, 151, 155, 156, 160, 161, 178], "wai": [10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 30, 42, 60, 61, 63, 79, 88, 98, 103, 107, 108, 113, 114, 115, 119, 132, 133, 135, 141, 147, 154, 160], "wait": [0, 1, 10, 11, 15, 16, 20, 26, 30, 40, 47, 92, 97, 111, 127, 132, 141, 155, 158, 174], "wait_ev": 104, "wait_event_1": 104, "wait_event_2": 104, "wait_for_layer_load": [61, 92], "wait_for_sav": [61, 92], "waiv": 105, "walk": [18, 21, 27, 45, 67, 73, 133, 134, 135], "walkthrough": 92, "wall": 33, "wang1120": 160, "wangkuiyi": 160, "want": [2, 11, 13, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 33, 34, 41, 42, 61, 79, 87, 94, 98, 106, 113, 121, 127, 129, 131, 132, 135, 138, 140, 141, 151, 155, 156, 160, 161, 166], "war": 1, "warm": [22, 33, 88, 110, 177], "warmup": [2, 16, 20, 21, 22, 41, 79, 131, 132, 133, 160, 163, 177], "warn": [22, 23, 24, 26, 27, 36, 43, 63, 79, 83, 113, 132, 147, 155, 167], "warn_on_unstable_feature_usag": 155, "warp": [119, 160], "washington": 29, "wast": [11, 15, 20, 37, 40], "watch": 139, "watt": 21, "wdkv": 13, "wdq": 13, "we": [1, 2, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 31, 32, 33, 34, 35, 39, 41, 42, 43, 52, 57, 58, 60, 61, 67, 85, 86, 88, 93, 94, 98, 101, 103, 104, 106, 109, 110, 112, 114, 115, 118, 119, 120, 121, 122, 124, 127, 128, 129, 131, 132, 133, 134, 135, 138, 139, 141, 146, 151, 152, 154, 155, 156, 159, 160, 161, 170, 171], "weather": 90, "web": 129, "weekli": [28, 29, 31, 32], "weig": 141, "weight": [0, 1, 3, 4, 7, 9, 12, 13, 14, 16, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 43, 60, 68, 80, 81, 86, 95, 99, 100, 112, 118, 127, 133, 134, 135, 141, 142, 143, 144, 146, 155, 158, 160, 164, 165, 166, 171, 173], "weight_index": 141, "weight_load": [87, 142, 172], "weight_mapp": [87, 172], "weight_only_groupwise_quant_matmul": 149, "weight_only_groupwise_quant_matmul_plugin": 144, "weight_only_precis": 160, "weight_only_quant_matmul_plugin": 144, "weight_spars": [23, 155], "weight_stream": [23, 122, 155], "weightonlygroupwisequantmatmulplugin": 149, "weights_dict": [87, 127, 172], "weights_scaling_factor": [124, 126], "weightsinpoint": 1, "weightsoutpoint": 1, "welcom": [11, 16, 85, 101, 170], "well": [4, 10, 12, 16, 19, 20, 21, 39, 41, 47, 79, 93, 98, 110, 113, 114, 125, 131, 138, 139, 149, 150, 155, 159], "were": [0, 1, 3, 7, 10, 12, 15, 17, 20, 43, 98, 103, 120, 121, 124, 127, 134, 138, 155, 160], "weren": 109, "west": 29, "wget": [24, 151], "what": [11, 15, 16, 21, 27, 29, 33, 34, 36, 37, 41, 42, 45, 61, 64, 65, 66, 67, 69, 71, 73, 78, 88, 90, 92, 93, 95, 98, 105, 106, 110, 111, 131, 132, 133, 135, 138, 139, 144, 155, 173], "whatev": 1, "wheel": [30, 106, 108, 109, 160], "when": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 42, 47, 53, 54, 61, 62, 63, 79, 82, 88, 90, 92, 93, 94, 98, 99, 102, 103, 104, 105, 106, 109, 110, 111, 112, 113, 114, 116, 117, 118, 119, 121, 125, 126, 127, 128, 131, 132, 133, 135, 138, 139, 140, 141, 142, 143, 144, 146, 147, 149, 151, 155, 156, 158, 160, 161, 163, 166, 176, 177], "whenev": [1, 36, 79], "where": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 20, 22, 27, 28, 29, 30, 31, 32, 33, 40, 42, 43, 44, 46, 47, 54, 62, 67, 72, 74, 78, 79, 80, 88, 90, 92, 93, 94, 99, 103, 110, 113, 114, 116, 117, 119, 120, 121, 124, 125, 132, 135, 138, 140, 141, 146, 149, 155, 159, 160, 164, 166, 168, 178], "wherea": [0, 17, 88, 124, 138], "whether": [0, 1, 8, 11, 12, 16, 17, 19, 23, 24, 30, 39, 68, 79, 81, 88, 93, 111, 113, 118, 134, 135, 139, 141, 142, 144, 146, 155, 158, 162, 163, 165, 166], "which": [0, 1, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 53, 61, 62, 63, 68, 69, 79, 81, 82, 85, 86, 87, 88, 90, 91, 92, 93, 94, 96, 98, 99, 100, 102, 103, 104, 106, 110, 111, 112, 113, 114, 115, 117, 118, 121, 124, 125, 126, 127, 131, 132, 133, 135, 138, 139, 140, 141, 143, 144, 146, 147, 149, 154, 155, 159, 160, 162, 163, 165, 166, 170, 171, 172, 175, 176, 178], "while": [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 28, 29, 31, 32, 33, 34, 37, 40, 42, 62, 79, 85, 87, 88, 94, 97, 101, 104, 109, 110, 112, 115, 116, 117, 119, 120, 121, 125, 127, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 147, 149, 154, 160, 163, 170, 172], "whisper": [149, 150, 160], "whisperencod": 143, "whitespac": 155, "whl": [2, 106, 109], "who": [10, 11, 12, 14, 33, 39, 154], "whole": [1, 11, 33, 94, 104, 141, 155], "whose": [8, 10, 13, 16, 17, 19, 37, 69, 88, 110, 117, 124, 142, 155], "why": [0, 15, 18, 21, 37, 98, 125, 135, 138, 139, 147, 155], "wide": [0, 12, 14, 18, 86, 104, 105, 133, 158, 160, 171], "wide_ep": [20, 99], "wideep": [28, 30, 70, 155], "wider": 35, "widespread": 112, "width": [0, 1, 26, 51, 79, 86, 88, 102, 113, 114, 142, 146, 147, 155, 160, 171], "wildcard": [37, 166], "win": 155, "window": [0, 1, 8, 19, 23, 27, 42, 70, 89, 105, 116, 121, 132, 141, 144, 146, 155, 157, 160], "window_s": [70, 113, 155], "windows": 0, "wip": [13, 168], "wireless": 56, "wirelessaccesspoint": 56, "wise": [16, 20, 99, 100, 115, 141, 155, 160, 166], "wish": [39, 87, 106, 117], "with_ssh": 128, "with_traceback": 155, "within": [8, 10, 11, 12, 15, 16, 20, 21, 28, 29, 30, 31, 32, 33, 34, 39, 42, 69, 79, 88, 93, 96, 104, 105, 110, 113, 116, 119, 121, 125, 132, 134, 135, 138, 139, 141, 155, 159, 177], "without": [0, 1, 2, 7, 10, 11, 12, 13, 16, 17, 19, 20, 23, 24, 28, 29, 33, 36, 37, 40, 47, 61, 62, 63, 79, 85, 86, 92, 94, 95, 97, 98, 101, 111, 113, 119, 121, 125, 126, 132, 135, 139, 141, 143, 155, 156, 160, 161, 163, 168, 170, 171, 173, 174], "wkr": 13, "wo": [13, 126, 160], "wo_gemm": [13, 20], "won": [20, 33, 34, 104, 134, 146, 155], "word": [0, 11, 102, 111, 113, 114, 141, 146, 155, 158, 160, 176], "word_dict": 146, "word_embed": 126, "word_embeddings_layernorm": 126, "work": [2, 8, 10, 12, 16, 19, 21, 22, 29, 30, 33, 34, 39, 40, 42, 43, 47, 61, 64, 65, 66, 71, 79, 87, 88, 93, 94, 95, 104, 105, 106, 107, 109, 110, 113, 114, 115, 116, 119, 121, 125, 127, 136, 137, 141, 146, 149, 151, 155, 156, 160, 161, 173], "work_dir": 30, "workaround": [2, 21, 28, 126, 154, 160], "workdir": [27, 64, 65, 66, 106], "worker": [17, 23, 27, 30, 40, 88, 101, 103, 125, 132, 147, 155, 160], "worker_cl": 11, "worker_config": 30, "worker_env_var": 30, "worker_not_support": 11, "worker_tag": 11, "workerexecutablepath": 0, "workertag": 11, "workflow": [2, 11, 12, 14, 16, 17, 24, 43, 47, 79, 80, 82, 85, 86, 88, 101, 104, 105, 113, 114, 123, 124, 133, 135, 136, 137, 141, 151, 154, 155, 160, 164, 166, 170, 171], "workload": [8, 10, 11, 12, 15, 16, 17, 18, 19, 20, 23, 26, 41, 42, 43, 68, 80, 88, 91, 98, 99, 104, 112, 119, 125, 131, 132, 133, 135, 136, 137, 138, 139, 158, 164], "workspac": [1, 12, 16, 22, 23, 27, 38, 42, 95, 132, 141, 147, 155, 160, 173], "workstat": 4, "world": [0, 2, 8, 14, 16, 21, 29, 39, 42, 81, 82, 94, 95, 115, 132, 133, 134, 135, 141, 158, 165, 166, 173], "world_config": 146, "world_siz": [82, 84, 124, 127, 141, 160, 166, 168, 169], "worldconfig": [0, 114, 146], "worldsiz": 1, "worri": [11, 104], "wors": [20, 23, 121, 135, 144], "worst": [16, 98, 104, 138, 139], "worth": [113, 116, 135, 139], "would": [0, 10, 12, 14, 16, 42, 88, 90, 98, 103, 104, 109, 115, 121, 132, 133, 135, 138, 140, 141, 155, 156, 161], "wpa2": 56, "wqr": 13, "wrap": [0, 1, 23, 104, 125, 133, 141, 144, 146, 154, 160], "wrapped_properti": 155, "wrapper": [1, 10, 16, 28, 29, 30, 31, 32, 33, 34, 64, 79, 86, 104, 115, 127, 163, 171], "write": [0, 1, 11, 12, 13, 16, 23, 39, 61, 69, 104, 117, 126, 141, 151, 160, 168], "write_interv": 155, "written": [11, 12, 22, 104, 125, 132, 141], "wrong": [121, 160], "wsl": 160, "wuk": 13, "wuq": 13, "wuv": 13, "www": 160, "x": [0, 1, 9, 10, 16, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 93, 104, 106, 107, 111, 114, 118, 122, 132, 141, 142, 143, 149, 155, 159, 160], "x64": 21, "x86": 117, "x86_64": 150, "xcomposer2": 160, "xf": 24, "xgrammar": [0, 10, 56, 76, 90, 111, 155, 160], "xl": 160, "xml": 111, "xmlcharrefreplac": 155, "xor": 141, "xqa": [105, 160], "xxx": [126, 127, 151], "xxx_plugin": 144, "xxxconfig": 36, "xxxxx": 30, "xy": 141, "y": [2, 6, 10, 11, 16, 42, 85, 100, 104, 106, 107, 109, 111, 128, 132, 141, 143, 149, 155, 170], "y_bia": 141, "yaml": [9, 16, 17, 18, 20, 21, 22, 24, 26, 30, 35, 36, 37, 42, 43, 49, 76, 87, 88, 90, 99, 103, 132, 160, 172], "yarn": [141, 160], "ye": [30, 33, 88, 89, 91, 110, 141, 147, 148, 157], "yeah": [33, 67], "yelp": [150, 157], "yen": [42, 132], "yet": [0, 4, 12, 13, 16, 19, 27, 36, 88, 107, 114, 127, 141, 168, 178], "yield": [8, 11, 15, 19, 47, 117, 135, 138], "yiyixu": [27, 45, 73], "yml": [2, 14, 26, 27, 28, 29, 31, 32, 33, 34, 37, 38, 43, 48, 88, 104, 132], "york": [27, 28, 29, 30, 31, 32, 33, 44, 46, 72, 74, 90, 159, 168], "you": [2, 7, 9, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 42, 43, 44, 45, 47, 49, 53, 54, 56, 61, 64, 65, 66, 67, 71, 72, 73, 76, 79, 80, 81, 82, 85, 87, 88, 90, 92, 93, 94, 95, 96, 98, 100, 101, 102, 103, 104, 105, 106, 107, 109, 111, 112, 113, 114, 115, 117, 118, 120, 121, 124, 125, 127, 128, 129, 132, 134, 135, 136, 137, 138, 139, 140, 141, 146, 147, 151, 153, 154, 155, 156, 159, 160, 161, 163, 164, 165, 166, 168, 170, 172, 173, 175], "your": [2, 7, 9, 11, 12, 14, 16, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 42, 43, 47, 49, 53, 64, 65, 66, 67, 68, 69, 79, 80, 81, 82, 85, 86, 87, 88, 90, 98, 99, 102, 104, 106, 109, 110, 117, 118, 119, 121, 127, 128, 129, 131, 132, 133, 134, 135, 136, 137, 138, 139, 151, 154, 155, 156, 159, 161, 163, 164, 165, 166, 170, 171, 172, 177], "your_data_path": [2, 14], "your_dockerhub_usernam": [128, 129], "your_model_dir": 14, "your_model_path": [2, 16], "your_public_kei": 129, "your_work_path": 2, "yourself": [80, 164, 175], "yuhuili": 71, "yyi": 151, "z": [11, 106, 107, 141], "zars19": 160, "zero": [0, 1, 10, 62, 97, 111, 126, 141, 142, 149, 151, 154, 155, 174], "zero_is_placehold": 141, "zfill": 155, "zhang": 11, "zhuang": 11, "zip": 61, "zjli2013": 160, "zoo": [21, 63, 160], "zoom": [8, 16], "\u00b5": 20, "\u7f8e\u56fd\u7684\u9996\u90fd\u5728\u54ea\u91cc": 75}, "titles": ["Executor", "Runtime", "How to get best performance on DeepSeek-R1 in TensorRT LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "ADP Balance Strategy", "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)", "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly", "Inference Time Compute Implementation in TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)", "Disaggregated Serving in TensorRT LLM", "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server", "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)", "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM", "trtllm-bench", "trtllm-build", "trtllm-eval", "trtllm-serve", "Run benchmarking with <code class=\"docutils literal notranslate\"><span class=\"pre\">trtllm-serve</span></code>", "trtllm-serve", "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware", "Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell", "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Model Recipes", "LLM API Change Guide", "Continuous Integration Overview", "Using Dev Containers", "Introduction to KV Cache Transmission", "Architecture Overview", "Performance Analysis", "TensorRT LLM Benchmarking", "Overview", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Dynamo K8s Example", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "How to Change KV Cache Behavior", "How to Change Block Priorities", "LLM Examples", "Generate text with guided decoding", "Generate text", "Generate text asynchronously", "Generate text in streaming", "Distributed LLM Generation", "KV Cache Connector", "KV Cache Offloading", "Control generated text using logits processor", "Run LLM-API with pytorch backend on Slurm", "Run trtllm-bench with pytorch backend on Slurm", "Run trtllm-serve with pytorch backend on Slurm", "Generate text with multiple LoRA adapters", "Runtime Configuration Examples", "Sampling Techniques Showcase", "Sparse Attention", "Speculative Decoding", "OpenAI Chat Client", "OpenAI Chat Client for Multimodal", "OpenAI Completion Client", "Openai Completion Client For Lora", "OpenAI Completion Client with JSON Schema", "Online Serving Examples", "Additional Outputs", "Multi-Head, Multi-Query, and Group-Query Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy (Prototype)", "Support Matrix", "Checkpoint Loading", "Disaggregated Serving", "Feature Combination Matrix", "Guided Decoding", "Helix Parallelism", "KV Cache Connector", "KV Cache System", "Long Sequences", "LoRA (Low-Rank Adaptation)", "Multimodal Support in TensorRT LLM", "Overlap Scheduler", "Paged Attention, IFB, and Request Scheduling", "Parallelism in TensorRT LLM", "Quantization", "Ray Orchestrator (Prototype)", "Sampling", "Speculative Decoding", "Torch Compile &amp; Piecewise CUDA Graph", "Welcome to TensorRT LLM\u2019s Documentation!", "Building from Source Code on Linux", "Pre-built release container images on NGC", "Installation", "Installing on Linux via <code class=\"docutils literal notranslate\"><span class=\"pre\">pip</span></code>", "Disaggregated-Service (Prototype)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "&lt;no title&gt;", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Build Workflow", "Build the TensorRT LLM Docker Image", "Develop TensorRT LLM on Runpod", "Key Features", "Performance Analysis", "TensorRT-LLM Benchmarking", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Prerequisite Knowledge", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Memory Usage of TensorRT-LLM", "Multimodal Feature Support Matrix (PyTorch Backend)", "Numerical Precision", "Support Matrix", "Troubleshooting", "LLM API with TensorRT Engine", "PyTorch Backend", "LLM API Introduction", "API Reference", "Adding a New Model", "Supported Models", "Overview", "Quick Start Guide", "Release Notes", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Serving with trtllm-serve", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy", "Support Matrix", "Checkpoint Loading", "LoRA (Low-Rank Adaptation)", "Overlap Scheduler", "Quantization", "Sampling", "KV Cache Manager", "Scheduler"], "titleterms": {"": [4, 7, 79, 105, 113], "0": [154, 160], "000": [4, 5], "0528": 2, "0rc0": 9, "1": [2, 8, 9, 16, 18, 19, 20, 36, 43, 87, 92, 99, 106, 123, 125, 147, 154, 160, 172], "10": [4, 160], "100m": 4, "1024": 17, "11": 160, "12": [5, 160], "1200": 17, "120b": [9, 21], "13": 160, "13b": 5, "14": 160, "15": 160, "16": 160, "17": 160, "17b": 32, "18": 160, "180b": 3, "19": 160, "2": [2, 6, 8, 16, 18, 19, 20, 36, 87, 92, 99, 106, 123, 147, 154, 160, 172], "20": 160, "21": 160, "235b": 43, "256": 17, "2b": 118, "3": [2, 12, 16, 17, 18, 20, 31, 36, 43, 87, 99, 103, 123, 125, 132, 147, 150, 172], "30b": 43, "4": [2, 4, 18, 20, 43, 87, 123, 172], "405b": [43, 125], "4096": 17, "4400": 17, "4x": 6, "5": [2, 18], "6": [2, 3, 18], "6x": 4, "7": [18, 160], "70b": [3, 6, 31, 43, 125, 132], "7x": 3, "8": 160, "8192": 17, "8b": 43, "9": 160, "A": [11, 15], "As": 111, "For": [45, 51, 75], "In": [79, 98, 111, 113], "It": [97, 174], "Not": 147, "One": [13, 106], "The": [11, 16, 79, 93, 98, 111, 149], "To": 133, "With": [122, 158], "_prepare_draft_request": 103, "_prepare_draft_token": 103, "_torchllm": 36, "a100": [3, 4], "a22b": 43, "a3b": 43, "about": [24, 26, 27, 121, 134, 158], "absorb": 15, "accept": [13, 14], "access": [31, 32, 128], "account": 129, "accuraci": [7, 14, 28, 29, 31, 32, 91, 119], "achiev": [4, 5, 14], "acknowledg": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20], "across": 93, "activ": [142, 147], "ad": [36, 123, 156, 161], "adapt": [42, 67, 95, 132, 173], "add": [12, 36], "addit": [78, 111], "additional_model_output": 78, "address": 20, "adp": [8, 15], "advanc": [80, 85, 95, 106, 158, 164, 170, 173], "after": 154, "agent": 39, "algorithm": [19, 119], "alibi": [79, 113], "alloc": 93, "allreduc": 119, "alltoal": 12, "also": 168, "altern": 18, "an": [11, 36, 116], "analysi": [8, 10, 41, 131], "announc": 160, "api": [27, 28, 29, 30, 31, 32, 33, 34, 36, 64, 82, 90, 92, 105, 111, 115, 122, 127, 133, 152, 154, 155, 159, 160, 162, 166], "appli": 10, "approach": 20, "arbitrari": 111, "architect": 158, "architectur": [11, 13, 36, 40, 92, 99, 101, 103, 158, 162], "argument": [23, 36, 82, 166], "art": 158, "artifact": 18, "asynchron": 58, "asyncio": 47, "aten": 104, "attempt": 20, "attent": [13, 14, 15, 70, 79, 86, 93, 94, 98, 99, 113, 124, 138, 139, 140, 142, 163, 171], "attention_backend": [26, 28, 31, 32], "attentionbackend": [79, 163], "attentionmetadata": [79, 163], "auto": [19, 104], "auto_deploi": [84, 169], "autodeploi": [85, 170], "autodeploy_config": [80, 164], "autoregress": 13, "auxiliari": 20, "avoid": [20, 37, 133], "awq": [3, 124, 149], "b200": [2, 9, 13, 30], "backend": [13, 17, 21, 28, 29, 64, 65, 66, 79, 86, 88, 104, 148, 150, 153, 157, 161, 163, 171], "background": [8, 10, 11, 13, 14, 19, 95, 96, 104, 173], "balanc": [8, 13, 16, 20, 99], "base": [9, 11, 14, 47], "basecheckpointload": [87, 172], "baseconfigload": [87, 172], "baselin": [8, 135], "baseweightload": [87, 172], "baseweightmapp": [87, 172], "basic": [14, 28, 29, 31, 32, 33, 34, 55, 80, 87, 93, 95, 101, 164, 172, 173], "batch": [79, 98, 111, 113, 138], "beam": [79, 102, 111, 113], "befor": [42, 132, 133], "begin": 133, "behavior": [53, 93, 132], "bench": [22, 41, 65, 80, 95, 96, 103, 131, 133, 164, 173], "benchmark": [2, 7, 26, 28, 29, 30, 31, 32, 33, 34, 42, 43, 80, 132, 133, 164], "best": [2, 7, 36, 37, 99], "beta": 36, "between": 10, "bf16": 149, "bia": 113, "bind": [20, 106, 111, 125], "blackwel": [15, 28, 29, 30, 31, 32, 33, 34, 149], "block": [54, 116], "blockmanag": 116, "blog": 105, "bmm": 166, "bool": 166, "boost": [42, 132], "boundari": [12, 13], "break": 104, "breakdown": 26, "budget": 6, "buffer": [79, 113, 135, 147], "buffermanag": 1, "build": [2, 18, 22, 23, 47, 106, 124, 127, 128, 129, 132, 133, 139], "build_and_run_ad": [82, 166], "built": [82, 87, 107, 166, 172], "c": [16, 106, 111, 114, 147], "cach": [2, 17, 39, 53, 61, 62, 79, 88, 92, 93, 95, 98, 100, 103, 113, 116, 117, 124, 135, 140, 147, 173, 177], "cachecommun": 0, "callback": 10, "can": [117, 154, 158], "cannot": 154, "capabl": 158, "capac": 140, "captur": 10, "case": [19, 21, 92, 138], "cast": 142, "caveat": [42, 132], "challeng": 10, "chang": [36, 53, 54, 122, 138, 160], "chat": [9, 27, 44, 45, 72, 73], "check": 9, "checkpoint": [87, 124, 172], "choos": 7, "chunk": [2, 79, 94, 98, 113, 138, 140], "ci": 37, "class": [36, 111], "classic": 115, "cli": [82, 105, 127, 133, 166], "client": [44, 45, 46, 50, 51, 72, 73, 74, 75, 76, 95, 173], "clock": [2, 42, 132], "clone": 18, "close": [3, 6], "cluster": 88, "cnn_dailymail": 24, "code": [28, 29, 31, 32, 33, 34, 106], "collect": [16, 41, 131], "combin": [2, 10, 89], "come": 7, "command": 43, "commit": 36, "common": [1, 36, 47, 104], "commun": [13, 16, 20, 39, 99, 134, 154], "compat": [10, 30, 96], "compil": [2, 12, 86, 104, 106, 125, 171], "complet": [9, 27, 46, 74, 75, 76], "complex": 19, "compon": [39, 87, 114, 153, 172], "compos": 38, "comprehens": 158, "comput": [10, 11], "concat": 12, "conclus": [8, 135, 138, 139], "config": [23, 87, 91, 124, 172], "configur": [8, 9, 13, 16, 21, 27, 28, 29, 31, 32, 33, 34, 36, 38, 47, 68, 80, 82, 91, 95, 111, 114, 118, 129, 135, 139, 156, 161, 164, 166, 168, 173], "connect": [39, 129], "connector": [61, 92], "consider": 119, "constructor": 36, "consumpt": 122, "contain": [2, 9, 21, 26, 28, 29, 31, 32, 33, 34, 38, 106, 107, 128, 159], "content": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 37, 39, 87, 95, 104, 136, 137, 156, 161, 172, 173], "context": [2, 8, 12, 79, 94, 98, 99, 111, 113, 138, 139, 140], "contigu": [79, 98, 113], "continu": 37, "control": [11, 63, 93, 111], "conv": 142, "convers": [19, 123, 127], "cooper": 10, "coordin": [8, 41, 131], "copi": [12, 20], "core": [11, 16, 80, 87, 156, 161, 164, 172], "correct": 91, "cot": 11, "coverag": 27, "cp": 99, "cpp": 118, "cpu": [10, 20], "creat": [9, 21, 87, 129, 172], "cross": [79, 93, 113], "cuda": [10, 13, 40, 80, 104, 164], "cuda_graph_config": [26, 28, 29, 31, 32, 33, 34], "cudaev": 1, "cudamemcpyasync": 20, "cudastream": 1, "curl": [44, 45, 46], "current": 104, "custom": [39, 47, 55, 87, 99, 104, 126, 172, 177, 178], "cutlass": 13, "cyclic": [79, 113], "data": [10, 15, 99], "dataset": [2, 8, 16, 17, 19, 22, 42, 43, 132, 133], "datatransceiverst": 0, "datatyp": 93, "deadlock": [10, 20], "debug": [41, 88, 110, 131, 151], "decid": 134, "decod": [9, 10, 14, 19, 23, 56, 71, 90, 93, 103, 111, 121, 147, 162], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 115, "deep": [15, 82, 166], "deepseek": [2, 13, 14, 15, 17, 28, 43, 48], "default": [2, 13, 82, 132, 133, 166], "definit": [37, 125, 156, 161], "dens": [13, 99], "depend": [12, 13], "deploi": [30, 159], "deploy": [28, 29, 30, 31, 32, 33, 34, 35, 85, 105, 170], "deprec": [36, 93], "dequant": 149, "descript": [41, 131], "design": 16, "detail": [36, 92, 118, 149], "dev": 38, "develop": [15, 103, 104, 105, 129, 153], "dgx": 30, "diagram": 13, "differ": 111, "disabl": [37, 47, 93], "disaggreg": [17, 27, 30, 88, 110, 121], "disaggregated_mpi_work": 27, "disaggserverutil": 0, "distribut": [8, 60], "dive": 15, "do": 158, "docker": [18, 21, 28, 29, 30, 31, 32, 33, 34, 38, 106, 128, 129, 154, 159], "dockerhub": [128, 129], "document": [105, 160], "dora": 118, "dot": [82, 166], "download": [2, 9, 18], "dp": 99, "dq": 149, "draft": [10, 103, 121], "dynamo": [17, 49, 88], "dynasor": 11, "e2": [16, 26, 28, 29, 31, 32, 151], "eagl": [14, 103, 121], "eagle3": [9, 14, 18], "eaglebuff": 1, "eaglemodul": 1, "ebnf": 90, "effect": [16, 19], "embed": [79, 113, 142], "enabl": [2, 19, 41, 93, 99, 112, 117, 128, 131, 135, 139], "encapsul": 39, "end": [12, 20, 26, 28, 29, 31, 32, 91], "endpoint": [27, 28, 29, 30, 31, 32, 33, 34], "engin": [124, 125, 132, 133, 152, 162], "enhanc": 160, "environ": [36, 88, 110], "ep": [15, 16, 30, 99, 166], "eplb": [16, 20, 99], "error": 151, "errorcod": 154, "etp": 13, "eval": 24, "evalu": [14, 16, 28, 29, 31, 32, 124], "event": 116, "everyth": 13, "evolut": 39, "exampl": [11, 24, 41, 49, 52, 55, 68, 77, 80, 81, 92, 96, 111, 118, 124, 125, 126, 131, 132, 154, 164, 165], "except": 147, "exchang": [17, 88], "execut": 151, "executor": [0, 111, 118], "exist": 36, "exp": 12, "expand": 16, "expect": [2, 117], "experi": 8, "experiment": 19, "expert": [12, 13, 15, 16, 20, 28, 82, 99, 112, 166], "explicitdrafttokensbuff": 1, "explor": 2, "express": 90, "extens": 16, "extra": 21, "extra_llm_api_opt": 26, "face": 154, "factor": [79, 113, 124], "factori": 166, "fail": 37, "failur": 104, "falcon": 3, "fals": 166, "faq": [88, 110, 147], "fast": 37, "faster": 3, "featur": [2, 11, 41, 79, 85, 89, 99, 101, 105, 130, 131, 148, 153, 157, 158, 160, 170], "ffn": 99, "field": 36, "file": [27, 82, 106, 166], "find": 37, "first": [4, 19, 26, 28, 29, 31, 32], "fix": 160, "flag": [139, 149], "flayerinfo": 115, "flight": [79, 98, 111, 113], "flow": [42, 132], "fmha": [12, 79, 113], "format": [2, 87, 95, 118, 172, 173], "formatt": 39, "fp16": [2, 149], "fp32": 149, "fp4": [12, 43, 158], "fp8": [2, 4, 12, 43, 79, 100, 113, 124, 135, 149, 158], "fraction": 140, "framework": 11, "free": 140, "free_gpu_memory_fract": 33, "from": [36, 85, 106, 154, 170], "full": [8, 106], "fulli": 126, "function": [115, 126, 141], "further": 20, "fuse": 12, "fuse_a_gemm": 13, "fusion": [12, 13, 104, 125, 135, 139], "futur": [11, 13, 14, 15, 17, 20, 47], "garbag": [41, 131], "gate": 135, "gb200": [9, 16, 30], "gc": [41, 131], "gemm": [12, 13, 135, 139], "genai": [50, 51], "gener": [16, 47, 56, 57, 58, 59, 60, 63, 67, 79, 88, 102, 104, 110, 113, 154], "get": [2, 9, 80, 85, 105, 164, 170], "gil": [10, 41, 131], "got": 154, "gpqa_diamond": 24, "gpqa_extend": 24, "gpqa_main": 24, "gpt": [9, 21, 29, 114, 118], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 149, "gpu": [2, 3, 10, 13, 15, 16, 42, 122, 125, 132, 140, 147, 158], "gqa": 93, "gram": 19, "grammar": [10, 90], "graph": [10, 13, 40, 80, 104, 115, 164], "group": [13, 79, 113], "gsm8k": [16, 24], "guarante": 36, "guid": [10, 28, 29, 30, 31, 32, 33, 34, 35, 36, 56, 90, 99, 103, 104, 105, 111, 136, 153, 156, 159, 161], "h": [0, 1], "h100": [4, 5, 21], "h200": [2, 3, 5, 6, 21], "ha": 4, "hang": 154, "hardwar": [8, 28, 29, 31, 32, 33, 34, 43, 100, 150], "hbm": 5, "head": [12, 79, 113], "header": 106, "health": 9, "helix": 91, "heurist": [19, 166], "hierarch": 36, "hierarchi": 116, "high": [16, 21, 115], "highlight": [19, 20], "hopper": [28, 31, 32, 33, 34, 149], "host": [10, 16, 20, 93, 117, 154], "how": [2, 11, 13, 14, 15, 18, 53, 54, 91, 93, 97, 99, 112, 117, 132, 134, 138, 174], "hub": 154, "hug": 154, "huggingfac": [87, 172], "i": [4, 93, 134, 147], "ibuff": 1, "id": 118, "ifb": 98, "igptdecoderbatch": 1, "imag": [18, 30, 38, 106, 107, 128, 129], "implement": [8, 11, 13, 14, 16, 20, 36, 79, 92, 123, 163], "import": 113, "improv": 121, "incorpor": [84, 169], "increas": 6, "indic": 105, "infer": [7, 11, 14, 16, 17, 21, 27, 111, 147, 159], "inform": [41, 115, 131], "infrastructur": 160, "initi": 20, "inplac": 104, "input": [79, 113, 154], "instal": [2, 108, 109, 151], "int4": [3, 149], "int8": [79, 113, 149], "integr": [10, 37, 86, 171], "inter": [20, 26, 28, 29, 31, 32], "interfac": [16, 92, 177], "intern": 114, "interv": 20, "introduct": [11, 15, 16, 26, 28, 29, 30, 31, 32, 33, 34, 39, 52, 154, 156, 161, 177, 178], "invok": 154, "ipcnvlsmemori": 1, "ipcutil": 1, "ir": 104, "isl": [2, 17], "issu": [2, 20, 104, 147, 153, 154, 160], "itensor": 1, "iter": [41, 131], "itl": [26, 28, 29, 31, 32], "jenkin": 37, "json": [76, 90, 91], "json_mode_ev": 24, "k": [12, 98], "k2": 30, "k8": 49, "kei": [13, 26, 28, 29, 31, 32, 39, 85, 99, 126, 129, 130, 134, 153, 157, 158, 160, 170], "kernel": [6, 10, 12, 13, 16, 20, 21, 99], "kimi": 30, "knob": 36, "knowledg": [104, 136, 137], "known": [104, 106, 147, 153, 160], "kv": [2, 17, 39, 53, 61, 62, 79, 88, 92, 93, 98, 100, 103, 113, 116, 117, 124, 135, 140, 147, 177], "kv_cache_config": [26, 28, 31, 32, 33], "kv_cache_free_gpu_memory_fract": [28, 29, 31, 32, 34], "kvcacheconnectorschedul": 92, "kvcacheconnectorwork": 92, "kvcacheeventmanag": 116, "kvcachemanag": 162, "larg": 16, "latenc": [2, 6, 8, 13, 21, 22, 26, 28, 29, 31, 32, 132, 133, 135], "latest": [5, 158], "launch": [9, 12, 13, 18, 21, 26, 28, 29, 30, 31, 32, 33, 34, 41, 88, 131, 159], "layer": [13, 15, 142], "layernorm": 124, "layout": [17, 88, 126], "leader": 92, "level": [13, 16, 39, 83, 99, 115, 162, 167], "librari": 39, "licens": [31, 32], "light": 8, "limit": [42, 92, 93, 106, 121, 132, 160, 168], "linear": 142, "link": [100, 105, 106], "linux": [106, 109], "list": [11, 166], "llama": [3, 6, 43, 125, 132, 135, 139], "llama2": 5, "llama3": 31, "llama4": [18, 32], "llm": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 36, 42, 47, 52, 55, 60, 64, 82, 85, 90, 91, 96, 99, 100, 104, 105, 106, 112, 121, 124, 126, 127, 128, 129, 132, 133, 138, 147, 150, 152, 154, 158, 159, 160, 166, 170], "llmarg": 36, "lm": 12, "load": [8, 16, 20, 87, 99, 126, 156, 161, 172], "loader": [87, 126, 172], "local": [12, 154], "log": [18, 83, 167], "logic": [16, 39], "logit": [23, 63, 102, 111], "logprob": 78, "long": 94, "longbench_v2": 24, "lookahead": 121, "lookaheadbuff": 1, "lookaheadmodul": 1, "lora": [23, 42, 67, 75, 95, 118, 132, 173], "loracach": [1, 118], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [12, 21, 39, 95, 119, 132, 135, 173], "lower": 12, "machin": [16, 17], "major": 11, "make": [10, 124], "manag": [20, 36, 42, 95, 115, 116, 132, 173, 177], "manual": 166, "map": [118, 132], "mapper": [87, 172], "mark": 111, "marker": [41, 131], "mask": 10, "match": 125, "mathemat": 8, "matrix": [28, 29, 85, 86, 89, 96, 100, 148, 149, 150, 157, 170, 171], "maverick": [18, 43], "max": [2, 21, 132, 138, 140], "max_batch_s": [28, 29, 31, 32, 33, 34, 98], "max_num_token": [28, 29, 31, 32, 33, 34, 98], "max_seq_len": [28, 29, 31, 32, 33, 34, 98], "maximum": 140, "measur": [17, 43], "mechan": 8, "medusa": [121, 132], "medusamodul": 1, "memori": [2, 5, 20, 93, 117, 122, 140, 147], "memorycount": 1, "merg": [37, 82, 166], "method": [7, 36, 115], "methodologi": [17, 26], "metric": [26, 27, 28, 29, 31, 32], "migrat": 20, "min": 2, "miscellan": 16, "mix": 13, "mixtur": [99, 112], "mla": [2, 15, 91], "mlp": [124, 135, 142], "mlperf": 4, "mm_embedding_serv": 27, "mmlu": 24, "mmmu": 24, "modal": [27, 42, 132, 150], "mode": [42, 132], "model": [2, 8, 9, 10, 13, 14, 28, 29, 30, 31, 32, 33, 34, 35, 42, 43, 85, 86, 91, 96, 99, 100, 103, 105, 114, 121, 123, 125, 126, 132, 134, 135, 139, 143, 150, 151, 154, 156, 157, 158, 160, 161, 162, 170, 171], "modelconfig": 1, "modelopt": 100, "modif": 39, "modifi": 36, "modul": [14, 15, 91, 99, 115, 118], "moe": [15, 20, 21, 28, 29, 99, 112], "moe_backend": 13, "moe_config": [26, 28, 29, 31, 32, 33, 34], "moe_expert_parallel_s": [28, 29, 31, 32, 33, 34], "monitor": 18, "more": [2, 6, 12, 41, 131], "motiv": [8, 10, 11, 16, 17, 19, 88, 99, 101], "mount": 38, "mpi_abort": 154, "mpi_comm_world": 154, "mqa": [15, 93], "mtp": [12, 13, 14, 20, 103], "much": 93, "multi": [13, 17, 19, 20, 27, 42, 79, 88, 95, 104, 113, 125, 132, 150, 154, 173], "multimod": [26, 27, 45, 51, 73, 96, 148, 157], "multipl": [67, 139], "multithread": 20, "mutex": 10, "n": 19, "name": [23, 36, 37, 126], "nativ": [12, 126], "nearli": 5, "nemo": [95, 173], "net": 154, "network": [12, 42, 132], "new": [6, 36, 79, 123, 156, 161, 163], "next": [7, 33, 159], "ngc": [18, 26, 107], "ngram": [103, 121], "node": [27, 125, 154], "non": [36, 132], "norm": [135, 139], "normal": 142, "notat": [82, 166], "note": [111, 113, 160], "nsight": [41, 131], "num": 138, "numa": 20, "numer": 149, "nvfp4": 149, "nvidia": [13, 15, 41, 131], "nvl72": 30, "nvtx": [41, 131], "o": 147, "observ": 16, "obtain": 111, "off": 8, "offlin": [16, 90, 100, 159], "offload": [62, 93, 117], "one": 16, "onli": [13, 21, 41, 104, 106, 131, 149, 154], "onlin": [16, 20, 77, 90, 159], "op": 104, "openai": [21, 30, 72, 73, 74, 75, 76, 96], "oper": 104, "optim": [8, 12, 13, 14, 15, 17, 20, 40, 79, 80, 88, 96, 104, 113, 139, 158, 164], "option": [2, 18, 21, 28, 29, 31, 32, 33, 34, 78, 80, 106, 135, 139, 140, 164], "orchestr": 101, "osl": [2, 17], "oss": [9, 21, 29], "other": 132, "out": [2, 156, 161], "outlook": 39, "output": [26, 28, 29, 31, 32, 78, 111, 132], "over": [3, 16, 36], "overhead": 20, "overlap": [12, 17, 40, 88, 97, 174], "overload": 39, "overrid": 38, "overview": [12, 36, 37, 40, 43, 87, 92, 99, 114, 124, 126, 127, 158, 172], "ovewiew": 162, "own": [84, 169, 178], "p": 117, "pack": [79, 113], "pad": [104, 113], "page": [20, 79, 98, 113, 116, 138, 139, 140], "parallel": [12, 13, 15, 16, 20, 28, 91, 99, 112, 118, 132, 134, 139], "paramet": [91, 114, 166], "pareto": 8, "parser": 48, "part": [12, 16, 20, 123], "partial": 93, "pattern": [92, 115, 125], "pdl": 12, "per": [26, 28, 29, 31, 32], "perf": [50, 51], "perform": [2, 4, 7, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 28, 29, 31, 32, 33, 34, 41, 80, 104, 117, 119, 121, 131, 133, 135, 136, 139, 158, 164], "persist": [42, 132], "phase": [12, 79, 113], "piecewis": 104, "pip": 109, "pipelin": [37, 99, 134, 139], "pitfal": 133, "plugin": [23, 125, 135, 139, 144], "pod": 129, "polici": [93, 140], "pool": [116, 142, 147], "popular": 35, "posit": [79, 113], "post": [37, 111], "postprocess": 126, "power": [42, 132], "pp": 99, "practic": [7, 36, 37, 99], "pre": [100, 107], "preced": [82, 166], "precis": [12, 13, 15, 86, 119, 149, 171], "predict": 20, "prefer": 36, "prefil": 98, "prepar": [2, 18, 30, 42, 43, 124, 129, 132, 133], "prepare_dataset": 22, "prerequisit": [2, 9, 18, 21, 28, 29, 30, 31, 32, 33, 34, 106, 136, 137, 156, 161], "prevent": [20, 117], "principl": 36, "prioriti": 54, "probe": 11, "process": 20, "processor": [63, 102, 111], "product": 158, "profil": [13, 41, 131, 139], "programmat": [12, 13], "promot": 36, "prompt_logprob": 78, "prompttuningparam": 1, "properti": 93, "propos": 10, "prototyp": [85, 101, 110, 153], "provid": [6, 103], "push": [12, 13, 18], "py": [22, 37, 82, 166], "pyexecutor": 162, "python": [10, 16, 106, 111, 147], "pytorch": [12, 41, 42, 64, 65, 66, 85, 131, 132, 148, 150, 153, 157, 158, 161, 170], "q": [12, 149], "qkv": [79, 113], "quantiz": [7, 12, 42, 47, 95, 100, 124, 127, 132, 135, 145, 149, 173, 175], "quantmod": 149, "queri": [30, 79, 113], "quick": [9, 35, 96, 100, 103, 105, 153, 154, 159, 168], "quickstart": 132, "quit": 154, "qwen": 17, "qwen3": [33, 34, 43], "r1": [2, 13, 14, 15, 17, 28, 43, 48], "rab": 113, "race": 10, "rai": 101, "rank": [95, 124, 154, 173], "ratio": 8, "rawengin": 1, "re": [13, 104], "reason": 48, "receiv": 39, "recip": 35, "recommend": [28, 29, 31, 32, 33, 34, 135, 139, 147], "recompil": 104, "record_signatur": 115, "redraft": 121, "reduc": [20, 122, 135, 139], "reduct": 12, "refer": [11, 80, 92, 97, 99, 105, 123, 155, 164, 174], "regist": 123, "registr": [156, 161], "registri": 18, "regular": 90, "reject": 10, "rel": 113, "relat": 115, "relax": [13, 14], "releas": [18, 107, 160], "replic": 99, "repositori": 18, "reproduc": [2, 13, 15, 16, 17, 43], "request": [9, 18, 21, 26, 93, 98, 111], "requir": [115, 119], "requires_shape_prop": 166, "resourcemanag": 162, "respons": 111, "result": [2, 8, 41, 43, 111, 131, 133], "retent": 93, "rethink": 12, "retriev": 115, "return_context_logit": 78, "return_generation_logit": 78, "reus": [93, 117], "revisit": [98, 138], "rewind": 103, "rewrit": 115, "right": 7, "roadmap": [85, 101, 170], "robin": 8, "roll": [79, 113], "rope": [79, 113], "rotari": [79, 113], "round": 8, "router": 13, "routergemm": 13, "run": [2, 9, 14, 16, 21, 26, 28, 29, 31, 32, 33, 34, 36, 41, 42, 43, 64, 65, 66, 81, 95, 96, 100, 118, 122, 131, 132, 133, 154, 159, 165, 173], "runpod": 129, "runtim": [1, 10, 15, 39, 40, 47, 68, 86, 106, 114, 118, 125, 140, 146, 147, 171], "runtimedefault": 1, "runtimeerror": 154, "salt": 93, "same": 6, "sampl": [9, 18, 21, 47, 69, 102, 103, 114, 121, 176], "samplingconfig": 1, "save": 133, "scaffold": 11, "scaffoldingllm": 11, "scale": [12, 16, 20, 79, 113, 124], "scatter": 139, "schedul": [8, 40, 92, 97, 98, 138, 140, 162, 174, 178], "schema": [36, 76, 90], "scout": [32, 43], "script": [81, 165], "seamless": [85, 170], "seamlessli": 10, "search": [79, 102, 113], "sec": 5, "second": [26, 28, 29, 31, 32], "secur": 93, "see": 168, "select": [21, 38], "send": 111, "sender": 39, "sequenc": 94, "serial": 0, "serv": [17, 21, 25, 26, 27, 30, 41, 66, 77, 88, 90, 95, 96, 103, 121, 131, 159, 168, 173], "server": [9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 33, 34, 88, 95, 96, 111, 173], "servic": [26, 110], "session": 154, "set": [8, 26, 28, 29, 31, 32, 33, 34, 42, 80, 132, 134, 164], "setup": 19, "sever": 12, "shard": [134, 166], "sharding_dim": 166, "sharding_sourc": 166, "share": 12, "shoot": 126, "show": [28, 29, 31, 32, 33, 34], "showcas": 69, "simple_shard_onli": 166, "singl": [3, 95, 154, 173], "situat": 117, "size": [93, 138, 140, 147], "slide": [79, 94, 113], "slurm": [27, 30, 55, 64, 65, 66, 88, 154], "smart": 13, "smoothquant": 149, "softwar": 150, "sol": 8, "sota": 7, "sourc": 106, "spars": [12, 13, 70], "specif": [35, 41, 131], "specul": [9, 10, 14, 19, 23, 71, 93, 103, 121], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": [7, 8, 11, 19], "speedup": 14, "ssh": [128, 129], "stabil": 36, "stage": 37, "start": [9, 18, 26, 27, 35, 80, 85, 95, 96, 103, 105, 153, 154, 159, 164, 168, 170, 173], "state": [10, 158], "statist": 16, "statu": [20, 104], "step": [2, 16, 17, 18, 28, 29, 31, 32, 33, 34, 87, 106, 123, 156, 159, 161, 172], "stop": 18, "strategi": [8, 13, 15, 99, 134], "stream": [13, 20, 59, 104, 122], "streamingllm": [79, 113], "structur": [12, 90, 111], "studi": [14, 16, 17, 19, 138], "style": 47, "subcommand": 132, "summari": [8, 132, 135, 139], "support": [12, 14, 17, 20, 28, 29, 42, 85, 86, 88, 91, 95, 96, 100, 106, 125, 126, 132, 148, 149, 150, 157, 158, 170, 171, 173], "support_partial_config": 166, "swiglu": 135, "syntax": [24, 27], "synthet": 17, "system": [13, 41, 93, 131], "tabl": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 37, 39, 87, 95, 104, 105, 136, 137, 156, 161, 172, 173], "tag": [18, 90, 107], "target": [10, 103, 121], "technic": 149, "techniqu": [7, 11, 69], "templat": 129, "tensor": [0, 12, 79, 99, 111, 112, 113, 115, 118, 134, 147], "tensor_parallel_s": [28, 29, 31, 32, 33, 34], "tensorrt": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 42, 85, 91, 96, 99, 100, 104, 105, 106, 112, 121, 124, 125, 126, 127, 128, 129, 132, 133, 138, 147, 150, 152, 158, 160, 170], "test": [18, 21, 28, 29, 31, 32, 33, 34, 36, 37, 91, 151], "test_to_stage_map": 37, "text": [56, 57, 58, 59, 63, 67], "theoret": 8, "think": [30, 134], "thought": 16, "thrash": 20, "through": 30, "throughput": [2, 6, 8, 15, 21, 22, 26, 28, 29, 31, 32, 42, 43, 132, 133], "time": [11, 26, 28, 29, 31, 32, 139, 147], "tip": [18, 21, 28, 29, 31, 32, 33, 34, 80, 104, 133, 151, 154, 164, 168], "tlb": 20, "tllmlogger": 1, "tok": 4, "token": [4, 5, 8, 10, 20, 26, 28, 29, 31, 32, 47, 138, 140], "token_norm_dist": 22, "token_unif_dist": 22, "tool": 127, "top": 162, "topic": 106, "topologi": 119, "torch": [12, 104], "torchllmarg": 36, "total": [26, 28, 29, 31, 32], "tp": [8, 26, 28, 29, 31, 32, 99, 166], "tpot": [26, 28, 29, 31, 32], "trace": [10, 104], "trade": 8, "tradeoff": [97, 174], "transceiv": 39, "transfer": 39, "transferag": 0, "transform": [17, 88], "translat": [16, 17, 19, 126], "transmiss": 39, "tree": [14, 121, 156, 161], "trigger": [37, 116], "triton": [17, 21, 111], "troubl": 126, "troubleshoot": [10, 18, 21, 28, 29, 31, 32, 33, 34, 88, 110, 133, 151, 154], "trt": 7, "trtllm": [13, 17, 22, 23, 24, 25, 26, 27, 41, 65, 66, 80, 88, 90, 95, 96, 103, 131, 133, 159, 164, 168, 173], "trtllmattent": 79, "true": 166, "trust_remote_cod": [28, 29, 31, 32, 33, 34], "ttft": [26, 28, 29, 31, 32], "tune": [2, 18, 104, 117, 136, 138], "turn": 19, "two": [10, 103], "type": [0, 36, 116], "understand": [138, 147], "unit": [37, 91, 151], "unnecessari": 37, "up": [3, 6, 7, 11, 19, 26], "updat": [20, 36, 160], "upload": [128, 129], "upper": 39, "us": [12, 18, 21, 37, 38, 63, 87, 91, 92, 115, 118, 121, 139, 140, 147, 154, 172], "usag": [24, 37, 80, 85, 87, 88, 92, 95, 97, 100, 101, 102, 103, 104, 119, 147, 164, 170, 172, 173, 174], "user": [103, 135], "v": [5, 8, 12, 112], "valid": [36, 42, 132], "vanilla": 14, "variabl": [36, 43, 88, 110], "verif": [13, 103], "verifi": [28, 29, 31, 32, 123], "via": [96, 109, 133, 168], "virtualmemori": 1, "visual": [41, 131], "volum": 38, "vote": 11, "w4a16": 149, "w8a16": 149, "w8a8": 149, "wa": 154, "wai": 18, "wait": 8, "waiv": 37, "weekli": 18, "weight": [15, 20, 87, 122, 123, 124, 125, 126, 147, 149, 156, 161, 172], "welcom": 105, "what": [4, 7, 116, 158], "when": [13, 87, 91, 115, 154, 172], "wide": [28, 30, 99], "width": 111, "window": [79, 93, 94, 113, 140], "windowblockmanag": 116, "wip": 2, "within": 6, "without": 106, "wo": 12, "work": [11, 13, 14, 15, 17, 20, 91, 97, 132, 174], "worker": [11, 92], "workflow": [36, 39, 41, 42, 84, 115, 126, 127, 131, 132, 169], "workload": 13, "world": 114, "worldconfig": 1, "write": 123, "xqa": [6, 79, 113], "yaml": [27, 28, 29, 31, 32, 33, 34, 80, 82, 91, 95, 164, 166, 168, 173], "you": [133, 158], "your": [18, 84, 169, 178]}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"(H200 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-only-using-openai-triton-kernels-for-moe"]], "(H200/H100 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-h100-only-using-openai-triton-kernels-for-moe"]], "1. Add the field to TorchLlmArgs": [[38, "add-the-field-to-torchllmargs"]], "1. Balance Ratio": [[8, "balance-ratio"]], "1. Committed APIs": [[38, "committed-apis"]], "1. Download TensorRT LLM": [[2, "download-tensorrt-llm"]], "1. Expert Replication and Load Balancing": [[103, "expert-replication-and-load-balancing"]], "1. Implement the method in _TorchLLM": [[38, "implement-the-method-in-torchllm"]], "1. Initial Approach for Weight Updating - cudaMemcpyAsync": [[20, "initial-approach-for-weight-updating-cudamemcpyasync"]], "1. Knob Naming": [[38, "knob-naming"]], "1. Scheduler (Leader) Interface (KvCacheConnectorScheduler)": [[96, "scheduler-leader-interface-kvcacheconnectorscheduler"]], "1. Using a Model from the Hugging Face Hub": [[158, "using-a-model-from-the-hugging-face-hub"]], "1. Weights size": [[151, "weights-size"]], "2. Activation size": [[151, "activation-size"]], "2. Avoiding Deadlock - Multithreaded CPU Copy with Managed Memory": [[20, "avoiding-deadlock-multithreaded-cpu-copy-with-managed-memory"]], "2. Custom EP Communication Kernels": [[103, "custom-ep-communication-kernels"]], "2. Download the DeepSeek R1 models": [[2, "download-the-deepseek-r1-models"]], "2. Hierarchical Configuration": [[38, "hierarchical-configuration"]], "2. Non-committed APIs": [[38, "non-committed-apis"]], "2. Speed-of-Light Throughput (SOL TPS)": [[8, "speed-of-light-throughput-sol-tps"]], "2. Update the API schema": [[38, "update-the-api-schema"], [38, "id1"]], "2. Using a Local Hugging Face Model": [[158, "using-a-local-hugging-face-model"]], "2. Worker Interface (KvCacheConnectorWorker)": [[96, "worker-interface-kvcacheconnectorworker"]], "3. Build and run TensorRT LLM container": [[2, "build-and-run-tensorrt-llm-container"]], "3. Expert Parallelism Load Balancer (EPLB)": [[103, "expert-parallelism-load-balancer-eplb"]], "3. I/O tensors": [[151, "i-o-tensors"]], "3. NUMA Memory to Prevent Page Migration": [[20, "numa-memory-to-prevent-page-migration"]], "3. Prefer LlmArgs Over Environment Variables": [[38, "prefer-llmargs-over-environment-variables"]], "3. Run validation tests": [[38, "run-validation-tests"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[151, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[151, "kv-cache-tensor"]], "4. Addressing the TLB Thrashing Issue": [[20, "addressing-the-tlb-thrashing-issue"]], "4. Compile and Install TensorRT LLM": [[2, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[2, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[2, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[119, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ADP Balance Strategy": [[8, null]], "ADP Balance Strategy: Coordinated Waiting Mechanism": [[8, "adp-balance-strategy-coordinated-waiting-mechanism"]], "ADP Balance with Context Wait Implementation": [[8, "adp-balance-with-context-wait-implementation"]], "ADP Balance with Full Strategy Implementation": [[8, "adp-balance-with-full-strategy-implementation"]], "ALiBi": [[83, "alibi"], [117, "alibi"]], "API": [[115, "api"]], "API Change Principles": [[38, "api-change-principles"]], "API Changes": [[126, "api-changes"], [164, "api-changes"], [164, "id4"], [164, "id9"], [164, "id13"], [164, "id24"], [164, "id29"], [164, "id34"], [164, "id39"], [164, "id46"], [164, "id51"], [164, "id57"], [164, "id63"], [164, "id69"]], "API Reference": [[96, "api-reference"], [109, null], [159, null]], "API Schema Management": [[38, "api-schema-management"]], "API Types and Stability Guarantees": [[38, "api-types-and-stability-guarantees"]], "ATen IR Optimization": [[108, "aten-ir-optimization"]], "AWQ Quantization Scaling Factors": [[128, "awq-quantization-scaling-factors"]], "About": [[24, "about"], [27, "about"]], "About Speculative Sampling": [[125, "about-speculative-sampling"]], "About TensorRT LLM": [[162, "about-tensorrt-llm"]], "About extra_llm_api_options": [[26, "about-extra-llm-api-options"]], "Access & Licensing": [[32, "access-licensing"], [33, "access-licensing"]], "Accuracy": [[7, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[14, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[14, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgement": [[8, "acknowledgement"], [16, "acknowledgement"], [17, "acknowledgement"]], "Acknowledgements": [[10, "acknowledgements"], [11, "acknowledgements"], [12, "acknowledgements"], [20, "acknowledgements"]], "Acknowledgment": [[13, "acknowledgment"], [14, "acknowledgment"], [15, "acknowledgment"]], "Activation": [[146, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[127, null]], "Adding a New Argument": [[38, "adding-a-new-argument"]], "Adding a New Method": [[38, "adding-a-new-method"]], "Adding a New Model": [[160, null]], "Adding a New Model in PyTorch Backend": [[165, null]], "Additional Outputs": [[82, null]], "Advanced Configuration": [[84, "advanced-configuration"], [168, "advanced-configuration"]], "Advanced Usage": [[89, "advanced-usage"], [99, "advanced-usage"], [174, "advanced-usage"], [177, "advanced-usage"]], "Advanced topics": [[110, "advanced-topics"]], "Algorithm": [[123, "algorithm"]], "Algorithm & Complexity": [[19, "algorithm-complexity"]], "An Example: Implement Dynasor-CoT on Scaffolding": [[11, "an-example-implement-dynasor-cot-on-scaffolding"]], "Announcements": [[164, "announcements"], [164, "id67"]], "Architecture": [[11, "architecture"], [38, "architecture"], [96, "architecture"], [105, "architecture"]], "Architecture Overview": [[42, null], [103, "architecture-overview"]], "Architecture Ovewiew": [[166, null]], "Asyncio-Based Generation": [[50, "asyncio-based-generation"]], "Attempts at Online EPLB Implementation": [[20, "attempts-at-online-eplb-implementation"]], "Attention": [[146, "module-tensorrt_llm.layers.attention"], [167, null]], "Attention Backends": [[83, "attention-backends"], [167, "attention-backends"]], "Attention Kernel": [[13, "attention-kernel"]], "Attention Module": [[103, "attention-module"]], "Attention Weights": [[128, "attention-weights"]], "Attention Window Size": [[97, "attention-window-size"]], "Attention backends": [[90, "attention-backends"], [175, "attention-backends"]], "Attention for MTP": [[14, "attention-for-mtp"]], "Auto Multi-stream": [[108, "auto-multi-stream"]], "AutoDeploy": [[174, null]], "AutoDeploy (Prototype)": [[89, null]], "Autoregressive MTP Layers": [[13, "autoregressive-mtp-layers"]], "Auto\u2011Enablement with Heuristic": [[19, "autoenablement-with-heuristic"]], "Avoiding unnecessary --disable-fail-fast usage": [[39, "avoiding-unnecessary-disable-fail-fast-usage"]], "B200 max-throughput for R1 with FP16 KV cache": [[2, "b200-max-throughput-for-r1-with-fp16-kv-cache"]], "B200 max-throughput for R1-0528 with FP8 KV cache": [[2, "b200-max-throughput-for-r1-0528-with-fp8-kv-cache"]], "B200 min-latency": [[2, "b200-min-latency"]], "Background": [[13, "background"], [14, "background"], [99, "background"], [100, "background"], [177, "background"]], "Background & Motivation": [[19, "background-motivation"]], "Background Knowledge": [[108, "background-knowledge"]], "Background and Challenges": [[10, "background-and-challenges"]], "Background and Motivation": [[11, "background-and-motivation"]], "BaseCheckpointLoader": [[91, "basecheckpointloader"], [176, "basecheckpointloader"]], "BaseConfigLoader": [[91, "baseconfigloader"], [176, "baseconfigloader"]], "BaseWeightLoader": [[91, "baseweightloader"], [176, "baseweightloader"]], "BaseWeightMapper": [[91, "baseweightmapper"], [176, "baseweightmapper"]], "Baseline Performance": [[8, "baseline-performance"]], "Baseline: Round-Robin Token Distribution": [[8, "baseline-round-robin-token-distribution"]], "Basic Implementation": [[14, "basic-implementation"]], "Basic Performance Configuration (autodeploy_config.yaml)": [[84, "basic-performance-configuration-autodeploy-config-yaml"], [168, "basic-performance-configuration-autodeploy-config-yaml"]], "Basic Test": [[29, "basic-test"], [30, "basic-test"], [32, "basic-test"], [33, "basic-test"], [34, "basic-test"], [35, "basic-test"]], "Basic Usage": [[84, "basic-usage"], [91, "basic-usage"], [99, "basic-usage"], [105, "basic-usage"], [168, "basic-usage"], [176, "basic-usage"], [177, "basic-usage"]], "Basics": [[58, "basics"]], "Beam search": [[106, "beam-search"]], "Beam-Search": [[83, "beam-search"], [117, "beam-search"]], "Before Benchmarking": [[44, "before-benchmarking"], [136, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[137, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[2, "benchmark"], [2, "id1"], [7, "benchmark"], [31, "benchmark"]], "Benchmarking Default Performance": [[137, null]], "Benchmarking Performance": [[29, "benchmarking-performance"], [30, "benchmarking-performance"], [32, "benchmarking-performance"], [33, "benchmarking-performance"], [34, "benchmarking-performance"], [35, "benchmarking-performance"]], "Benchmarking a non-Medusa Low Latency Engine": [[136, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with LoRA Adapters in PyTorch workflow": [[44, "benchmarking-with-lora-adapters-in-pytorch-workflow"], [136, "benchmarking-with-lora-adapters-in-pytorch-workflow"]], "Benchmarking with trtllm-bench": [[84, null], [137, "benchmarking-with-trtllm-bench"], [168, null]], "Best Practices": [[38, "best-practices"], [103, "best-practices"]], "Best practices to choose the right quantization methods": [[7, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[120, "block"]], "Blogs": [[109, null]], "Boost settings": [[44, "boost-settings"], [136, "boost-settings"]], "Build APIs": [[131, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[128, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[50, "build-configuration"]], "Build TensorRT LLM": [[110, "build-tensorrt-llm"]], "Build the TensorRT LLM Docker Image": [[132, null]], "Build the TensorRT LLM Docker Image and Upload to DockerHub": [[132, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [133, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[136, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[136, "building-a-medusa-low-latency-engine"]], "Building a TensorRT LLM Docker Image": [[110, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[137, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[137, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[110, null]], "Building the Python Bindings for the C++ Runtime": [[110, "building-the-python-bindings-for-the-c-runtime"]], "Built-in Checkpoint Formats": [[91, "built-in-checkpoint-formats"], [176, "built-in-checkpoint-formats"]], "Built-in Default Configuration": [[86, "built-in-default-configuration"], [170, "built-in-default-configuration"]], "C++ Executor API Example": [[115, "c-executor-api-example"]], "C++ GPT Runtime": [[118, null]], "C++ extension": [[16, "c-extension"]], "C++ runtime": [[151, "c-runtime"], [151, "id1"]], "CI pipelines": [[39, "ci-pipelines"]], "CLI Arguments with Dot Notation": [[86, "cli-arguments-with-dot-notation"], [170, "cli-arguments-with-dot-notation"]], "CLI Reference": [[109, null]], "CLI Tools": [[131, "cli-tools"]], "CUDA Callback": [[10, "cuda-callback"]], "CUDA Graph": [[42, "cuda-graph"]], "CUDA Graph & Programmatic Dependent Launch": [[13, "cuda-graph-programmatic-dependent-launch"]], "CUDA Graph Compatibility: Grammar Computation": [[10, "cuda-graph-compatibility-grammar-computation"]], "CUDA Graph Compatibility: Mask Applying Kernel": [[10, "cuda-graph-compatibility-mask-applying-kernel"]], "CUDA Graph Optimization": [[84, "cuda-graph-optimization"], [168, "cuda-graph-optimization"]], "CUTLASS Backend (default backend)": [[13, "cutlass-backend-default-backend"]], "Cache Layout Transformation": [[17, "cache-layout-transformation"], [92, "cache-layout-transformation"]], "Cache Management": [[99, "cache-management"], [177, "cache-management"]], "Cannot quit after generation": [[158, "cannot-quit-after-generation"]], "Capacity Scheduler Policy": [[144, "capacity-scheduler-policy"]], "Case 1 with Conversation Dataset": [[19, "case-1-with-conversation-dataset"]], "Case 2 with Translation Dataset": [[19, "case-2-with-translation-dataset"]], "Cast": [[146, "module-tensorrt_llm.layers.cast"]], "Chat API": [[27, "chat-api"]], "Checkpoint Loading": [[91, null], [176, null]], "Chunked Context": [[83, "chunked-context"], [98, "chunked-context"], [117, "chunked-context"]], "Chunked Context (a.k.a Chunked Prefill)": [[102, "chunked-context-a-k-a-chunked-prefill"]], "Chunked attention": [[98, "chunked-attention"]], "Classical Workflow": [[119, "classical-workflow"]], "Client Usage": [[99, "client-usage"], [177, "client-usage"]], "Closing": [[3, "closing"], [6, "closing"]], "Collect PyTorch profiler results": [[43, "collect-pytorch-profiler-results"], [135, "collect-pytorch-profiler-results"]], "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly": [[10, null]], "Command Overview": [[45, "command-overview"]], "Common Trace Failure": [[108, "common-trace-failure"]], "Common Workflows": [[38, "common-workflows"]], "Communication Kernel": [[13, "communication-kernel"]], "Communication Kernels": [[20, "communication-kernels"]], "Compilation": [[129, "compilation"]], "Compile Backends": [[90, "compile-backends"], [175, "compile-backends"]], "Completions API": [[27, "completions-api"]], "Comprehensive Configuration Database": [[36, "comprehensive-configuration-database"]], "Conclusion": [[8, "conclusion"], [139, "conclusion"], [142, "conclusion"], [143, "conclusion"]], "Config": [[128, "config"]], "Configuration": [[95, "configuration"]], "Configuration Examples": [[84, "configuration-examples"], [168, "configuration-examples"]], "Configuration Options Reference": [[84, "configuration-options-reference"], [168, "configuration-options-reference"]], "Configuration Parameters": [[95, "configuration-parameters"], [170, "configuration-parameters"]], "Configuration Precedence and Deep Merging": [[86, "configuration-precedence-and-deep-merging"], [170, "configuration-precedence-and-deep-merging"]], "Configuration via YAML": [[172, "configuration-via-yaml"]], "Configure SSH Key": [[133, "configure-ssh-key"]], "Configure The Executor": [[115, "configure-the-executor"]], "Configuring with YAML Files": [[27, "configuring-with-yaml-files"]], "Connect to the Pod": [[133, "connect-to-the-pod"]], "Connection": [[41, "connection"]], "Container image selection": [[40, "container-image-selection"]], "Container image tags": [[111, null]], "Context Chunking Policy": [[144, "context-chunking-policy"]], "Context Parallelism (CP)": [[103, "context-parallelism-cp"]], "Context Phase": [[83, "context-phase"], [117, "context-phase"]], "Context and Generation Phases": [[83, "context-and-generation-phases"], [117, "context-and-generation-phases"]], "Context phase Q/K/V concat optimization": [[12, "context-phase-q-k-v-concat-optimization"]], "Contiguous KV Cache": [[83, "contiguous-kv-cache"], [102, "contiguous-kv-cache"], [117, "contiguous-kv-cache"]], "Continuous Integration Overview": [[39, null]], "Control generated text using logits processor": [[66, null]], "Controller": [[11, "controller"]], "Controlling KV Cache Behavior": [[97, "controlling-kv-cache-behavior"]], "Controlling output with Logits Post-Processor": [[115, "controlling-output-with-logits-post-processor"]], "Conv": [[146, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[131, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[43, "coordinating-with-nvidia-nsight-systems-launch"], [135, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[43, "coordinating-with-pytorch-profiler-pytorch-workflow-only"], [135, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Components": [[91, "core-components"], [176, "core-components"]], "Core Features": [[11, "core-features"]], "Core Models": [[160, "core-models"], [165, "core-models"]], "Core Performance Settings": [[84, "core-performance-settings"], [168, "core-performance-settings"]], "Core implementations of the GPU logic": [[16, "core-implementations-of-the-gpu-logic"]], "Core implementations of the host logic": [[16, "core-implementations-of-the-host-logic"]], "Create a Pod Template": [[133, "create-a-pod-template"]], "Create a Runpod account": [[133, "create-a-runpod-account"]], "Create the Eagle3 Configuration": [[9, "create-the-eagle3-configuration"]], "Creating Custom Checkpoint Loaders": [[91, "creating-custom-checkpoint-loaders"], [176, "creating-custom-checkpoint-loaders"]], "Creating the Extra Options Configuration": [[21, "creating-the-extra-options-configuration"], [21, "id1"]], "Cross Attention": [[83, "cross-attention"], [117, "cross-attention"]], "Curl Chat Client": [[46, null]], "Curl Chat Client For Multimodal": [[47, null]], "Curl Completion Client": [[48, null]], "Curl Responses Client": [[49, null]], "Current Status": [[108, "current-status"]], "Custom Op": [[108, "custom-op"]], "Customization": [[41, "customization"], [58, "customization"]], "Customize KV Cache Manager": [[181, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[182, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[15, "data-parallel-for-attention-module-adp"]], "Data Parallelism (DP)": [[103, "data-parallelism-dp"], [103, "id2"]], "Dataset Configuration": [[8, "dataset-configuration"]], "Datatype": [[97, "datatype"]], "Debug Execution Errors": [[155, "debug-execution-errors"]], "Debug on E2E Models": [[155, "debug-on-e2e-models"]], "Debug on Unit Tests": [[155, "debug-on-unit-tests"]], "Debugging FAQs": [[92, "debugging-faqs"], [114, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[138, null]], "Decoder": [[166, "decoder"]], "DeepSeek R1": [[17, "deepseek-r1"], [45, "deepseek-r1"]], "DeepSeek R1 MTP Implementation and Optimization": [[14, null]], "DeepSeek-R1": [[28, null], [29, "deepseek-ai-deepseek-r1-0528"], [36, "deepseek-ai-deepseek-r1-0528"]], "DeepSeek-R1 (NVFP4)": [[28, "nvidia-deepseek-r1-0528-fp4-v2"], [29, "nvidia-deepseek-r1-0528-fp4-v2"], [36, "nvidia-deepseek-r1-0528-fp4-v2"]], "Deepseek R1 Reasoning Parser": [[51, null]], "Default Build Behavior": [[136, "default-build-behavior"]], "Dense GEMM optimization": [[13, "dense-gemm-optimization"]], "Dense Models": [[103, "dense-models"]], "Deploy Kimi K2 Thinking on DGX B200 through Docker": [[31, "deploy-kimi-k2-thinking-on-dgx-b200-through-docker"]], "Deploy Kimi K2 Thinking on GB200 NVL72 through SLURM with wide EP and disaggregated serving": [[31, "deploy-kimi-k2-thinking-on-gb200-nvl72-through-slurm-with-wide-ep-and-disaggregated-serving"]], "Deploy Online Serving with trtllm-serve": [[163, "deploy-online-serving-with-trtllm-serve"]], "Deployment Guide": [[109, null]], "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell & Hopper Hardware": [[29, null]], "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware": [[30, null]], "Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell": [[31, null]], "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell & Hopper Hardware": [[32, null]], "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell & Hopper Hardware": [[33, null]], "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell & Hopper Hardware": [[34, null]], "Deployment Guide for Qwen3 on TensorRT LLM - Blackwell & Hopper Hardware": [[35, null]], "Deployment Steps": [[29, "deployment-steps"], [30, "deployment-steps"], [32, "deployment-steps"], [33, "deployment-steps"], [34, "deployment-steps"], [35, "deployment-steps"]], "Deprecated Properties": [[97, "deprecated-properties"]], "Deprecating an API": [[38, "deprecating-an-api"]], "Develop TensorRT LLM on Runpod": [[133, null]], "Developer Guide": [[107, "developer-guide"], [109, null], [157, "developer-guide"]], "Development Guide": [[108, "development-guide"]], "Disable Tokenizer": [[50, "disable-tokenizer"]], "Disaggregated Serving": [[92, null], [125, "disaggregated-serving"]], "Disaggregated Serving in TensorRT LLM": [[17, null], [17, "id1"]], "Disaggregated-Service (Prototype)": [[114, null]], "Distributed LLM Generation": [[63, null]], "DoRA": [[122, "dora"]], "Documentation": [[164, "documentation"], [164, "id43"]], "Download Artifacts": [[18, "download-artifacts"]], "Download the models (Base + Eagle3)": [[9, "download-the-models-base-eagle3"]], "Draft Model": [[10, "draft-model"]], "Draft-Target-Model": [[125, "draft-target-model"]], "Draft/Target": [[107, "draft-target"]], "Dynamo": [[17, "dynamo"], [92, "dynamo"]], "Dynamo K8s Example": [[52, null]], "E2E evaluation": [[16, "e2e-evaluation"]], "EAGLE": [[125, "eagle"]], "EAGLE 3": [[107, "eagle-3"]], "EBNF grammar": [[94, "ebnf-grammar"], [94, "id3"]], "EP Load Balancer": [[16, "ep-load-balancer"]], "EP communication kernels": [[16, "ep-communication-kernels"]], "EP communication kernels implementation": [[16, "ep-communication-kernels-implementation"]], "Eagle3 support": [[14, "eagle3-support"]], "Effect of Multi-turn conversation": [[19, "effect-of-multi-turn-conversation"]], "Embedding": [[146, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[43, "enable-gil-information-in-nvtx-markers"], [135, "enable-gil-information-in-nvtx-markers"]], "Enable Offloading to Host Memory": [[97, "enable-offloading-to-host-memory"]], "Enable garbage collection (GC) NVTX markers": [[43, "enable-garbage-collection-gc-nvtx-markers"], [135, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[121, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[43, "enable-more-nvtx-markers-for-debugging"], [135, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[132, "enable-ssh-access-to-the-container"]], "Enable/Disable Cross Request Reuse": [[97, "enable-disable-cross-request-reuse"]], "Enabling GEMM + SwiGLU Fusion": [[139, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[143, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[139, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[143, "enabling-paged-context-attention"]], "Enabling Quantization": [[139, "enabling-quantization"]], "Enabling Quantized KV Cache": [[139, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[143, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[139, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[143, "enabling-building-with-multiple-profiles"]], "Encapsulation and Overloading of Low-Level Communication Libraries": [[41, "encapsulation-and-overloading-of-low-level-communication-libraries"]], "End-to-End (E2E) Latency": [[26, "end-to-end-e2e-latency"], [29, "end-to-end-e2e-latency"], [30, "end-to-end-e2e-latency"], [32, "end-to-end-e2e-latency"], [33, "end-to-end-e2e-latency"]], "End-to-End Accuracy test": [[95, "end-to-end-accuracy-test"]], "End-to-End Performance": [[12, "end-to-end-performance"], [20, "end-to-end-performance"]], "Environment Variables": [[92, "environment-variables"], [114, "environment-variables"]], "Evaluation": [[14, "evaluation"]], "Events in KVCacheEventManager": [[120, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[13, "everything-in-one-diagram"]], "Evolution Outlook": [[41, "evolution-outlook"]], "Example": [[128, "example"]], "Example Implementation": [[96, "example-implementation"]], "Example LoRA tensors": [[122, "example-lora-tensors"]], "Example Run Script": [[85, null], [169, null]], "Example of Build Subcommand Output:": [[136, "example-of-build-subcommand-output"]], "Examples": [[43, "examples"], [100, "examples"], [129, "examples"], [130, "examples"], [135, "examples"]], "Executor": [[0, null]], "Executor API": [[115, null]], "Expanded thoughts": [[16, "expanded-thoughts"]], "Expected Result Format": [[2, "expected-result-format"], [2, "id2"], [2, "id3"], [2, "id4"]], "Expected Results": [[2, "expected-results"]], "Experimental Setup": [[19, "experimental-setup"]], "Experiments": [[8, "experiments"]], "Expert Configuration of LLM API": [[86, null], [170, null]], "Expert Configuration of build_and_run_ad.py": [[86, "expert-configuration-of-build-and-run-ad-py"], [170, "expert-configuration-of-build-and-run-ad-py"]], "Expert Parallelism (EP)": [[103, "expert-parallelism-ep"]], "Expert Parallelism Load Balancer (EPLB)": [[20, "expert-parallelism-load-balancer-eplb"]], "Expert Parallelism in TensorRT-LLM": [[116, null]], "Expert parallel for MoE (EP)": [[15, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[2, "exploring-more-isl-osl-combinations"]], "FAQ": [[151, "faq"]], "FFN Module": [[103, "ffn-module"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[119, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[153, "fp32-fp16-and-bf16"]], "FP4 Models": [[45, "fp4-models"]], "FP4 Support": [[162, "fp4-support"]], "FP8 (Hopper)": [[153, "fp8-hopper"]], "FP8 Context FMHA": [[83, "fp8-context-fmha"], [117, "fp8-context-fmha"]], "FP8 KV Cache": [[104, "fp8-kv-cache"]], "FP8 Models": [[45, "fp8-models"]], "FP8 Quantization": [[139, null]], "FP8 Quantization Scaling Factors": [[128, "fp8-quantization-scaling-factors"]], "FP8 Support": [[162, "fp8-support"]], "FP8 context FMHA support": [[12, "fp8-context-fmha-support"]], "FP8 \u201cBaseline\u201d Performance": [[139, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[3, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[3, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Combination Matrix": [[93, null]], "Feature Descriptions": [[43, "feature-descriptions"], [135, "feature-descriptions"]], "Feature List on Scaffolding": [[11, "feature-list-on-scaffolding"]], "Features": [[105, "features"], [109, null], [157, "features"]], "Finding the stage for a test": [[39, "finding-the-stage-for-a-test"]], "Fixed Issues": [[164, "fixed-issues"], [164, "id5"], [164, "id10"], [164, "id14"], [164, "id26"], [164, "id30"], [164, "id36"], [164, "id41"], [164, "id48"], [164, "id53"], [164, "id59"], [164, "id65"], [164, "id71"], [164, "id76"]], "Formatter": [[41, "formatter"]], "Fully customized": [[130, "fully-customized"]], "Functionals": [[145, null]], "Further Performance Optimization": [[20, "further-performance-optimization"]], "Fuse add (sparse exp and shared exp) into local reduction": [[12, "fuse-add-sparse-exp-and-shared-exp-into-local-reduction"]], "Fuse several AlltoAll kernels": [[12, "fuse-several-alltoall-kernels"]], "Fuse_A_GEMM": [[13, "fuse-a-gemm"]], "Future Work": [[11, "future-work"], [17, "future-work"], [20, "future-work"]], "Future Works": [[13, "future-works"], [14, "future-works"], [15, "future-works"]], "Future-Style Generation": [[50, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[139, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[143, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[153, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[44, "gpu-clock-management"], [136, "gpu-clock-management"]], "Genai Perf Client": [[53, null]], "Genai Perf Client For Multimodal": [[54, null]], "General FAQs": [[92, "general-faqs"], [114, "general-faqs"]], "General usage": [[106, "general-usage"]], "Generate text": [[60, null]], "Generate text asynchronously": [[61, null]], "Generate text in streaming": [[62, null]], "Generate text with guided decoding": [[59, null]], "Generate text with multiple LoRA adapters": [[70, null]], "Generation": [[50, "generation"]], "Generation Phase": [[83, "generation-phase"], [117, "generation-phase"]], "Get Started": [[89, "get-started"], [174, "get-started"]], "Get the TensorRT LLM Container (1.1.0rc0)": [[9, "get-the-tensorrt-llm-container-1-1-0rc0"]], "Getting Started": [[84, "getting-started"], [109, null], [168, "getting-started"]], "Graph Break": [[108, "graph-break"]], "Graph Rewriting APIs": [[119, "graph-rewriting-apis"]], "Graph Rewriting Module": [[119, null]], "Grouped GEMM": [[13, "grouped-gemm"]], "Guided Decoding": [[10, "guided-decoding"], [94, null]], "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token": [[4, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM": [[5, null]], "H200 max-throughput": [[2, "h200-max-throughput"]], "H200 min-latency": [[2, "h200-min-latency"]], "H200 vs H100": [[5, "h200-vs-h100"]], "Hang issue on Slurm Node": [[158, "hang-issue-on-slurm-node"]], "Hardware": [[45, "hardware"], [154, "hardware"]], "Hardware Support Matrix": [[104, "hardware-support-matrix"]], "Hardware and Model Configuration": [[8, "hardware-and-model-configuration"]], "Helix Parallelism": [[95, null]], "Hierarchy: Pool, Block, and Page": [[120, "hierarchy-pool-block-and-page"]], "High-level design introduction": [[16, "high-level-design-introduction"]], "Highlights": [[19, "highlights"]], "Host Overhead Optimization": [[20, "host-overhead-optimization"]], "How Helix Works": [[95, "how-helix-works"]], "How It Works": [[101, "how-it-works"], [178, "how-it-works"]], "How Much Memory is Allocated to KV Cache": [[97, "how-much-memory-is-allocated-to-kv-cache"]], "How it speeds up inference": [[11, "how-it-speeds-up-inference"]], "How the Benchmarker Works": [[136, "how-the-benchmarker-works"]], "How to Change Block Priorities": [[57, null]], "How to Change KV Cache Behavior": [[56, null]], "How to Enable": [[116, "how-to-enable"]], "How to Enable Attention Parallelism": [[103, "how-to-enable-attention-parallelism"]], "How to Enable MoE Parallelism": [[103, "how-to-enable-moe-parallelism"]], "How to Think about Model Sharding: Communication is Key": [[138, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[142, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[142, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[121, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT LLM": [[2, null]], "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server": [[18, null]], "How to reproduce": [[13, "how-to-reproduce"], [15, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[14, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[14, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[138, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "HuggingFace Format": [[91, "huggingface-format"], [176, "huggingface-format"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[153, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[153, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[83, "int8-fp8-kv-caches"], [117, "int8-fp8-kv-caches"]], "ISL 4096 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-4096-osl-1024-machine-translation-dataset"]], "ISL 4400 - OSL 1200 (Machine Translation Dataset)": [[17, "isl-4400-osl-1200-machine-translation-dataset"]], "ISL 8192 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-8192-osl-1024-machine-translation-dataset"]], "ISL 8192 - OSL 256 (Synthetic Dataset)": [[17, "isl-8192-osl-256-synthetic-dataset"]], "Implement AttentionBackend": [[83, "implement-attentionbackend"], [167, "implement-attentionbackend"]], "Implement AttentionMetadata": [[83, "implement-attentionmetadata"], [167, "implement-attentionmetadata"]], "Implement Dynasor-CoT based Majority Voting in Scaffolding": [[11, "implement-dynasor-cot-based-majority-voting-in-scaffolding"]], "Implement Dynasor-CoT in Scaffolding": [[11, "implement-dynasor-cot-in-scaffolding"]], "Implement a New Attention Backend": [[83, "implement-a-new-attention-backend"], [167, "implement-a-new-attention-backend"]], "Implementation Configuration": [[13, "implementation-configuration"]], "Implementation Details": [[38, "implementation-details"], [96, "implementation-details"]], "Important Note": [[117, "important-note"]], "In-flight Batching": [[83, "in-flight-batching"], [102, "in-flight-batching"], [117, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[115, "in-flight-batching-with-the-triton-inference-server"]], "Incorporating auto_deploy into your own workflow": [[88, null], [173, null]], "Indices and tables": [[109, "indices-and-tables"]], "Inference Endpoints": [[27, "inference-endpoints"]], "Inference Time Compute Implementation in TensorRT LLM": [[11, null]], "Infrastructure Changes": [[164, "infrastructure-changes"], [164, "id3"], [164, "id8"], [164, "id15"], [164, "id19"], [164, "id22"], [164, "id27"], [164, "id31"], [164, "id37"], [164, "id42"], [164, "id49"], [164, "id54"], [164, "id60"]], "Infrastructure changes": [[164, "id66"]], "Input QKV tensor": [[83, "input-qkv-tensor"], [117, "input-qkv-tensor"]], "Installation": [[112, null]], "Installation Errors": [[155, "installation-errors"]], "Installing on Linux via pip": [[113, null]], "Integration to TensorRT LLM Python Runtime": [[10, "integration-to-tensorrt-llm-python-runtime"]], "Interfaces": [[181, "interfaces"]], "Internal Components": [[118, "internal-components"]], "Introduction": [[15, "introduction"], [29, "introduction"], [30, "introduction"], [31, "introduction"], [32, "introduction"], [33, "introduction"], [34, "introduction"], [35, "introduction"], [160, "introduction"], [165, "introduction"]], "Introduction for Dynasor-CoT": [[11, "introduction-for-dynasor-cot"]], "Introduction for Scaffolding: A Framework for inference-time compute": [[11, "introduction-for-scaffolding-a-framework-for-inference-time-compute"]], "Introduction to KV Cache Transmission": [[41, null]], "JSON Configuration (for YAML/JSON configs)": [[95, "json-configuration-for-yaml-json-configs"]], "JSON Schema": [[94, "json-schema"], [94, "id1"]], "Jenkins stage names": [[39, "jenkins-stage-names"]], "KV Cache": [[83, "kv-cache"], [102, "kv-cache"], [117, "kv-cache"]], "KV Cache Connector": [[64, null], [96, null]], "KV Cache Exchange": [[17, "kv-cache-exchange"], [92, "kv-cache-exchange"]], "KV Cache Management: Pools, Blocks, and Events": [[120, null]], "KV Cache Manager": [[181, null]], "KV Cache Manager Introduction": [[181, "kv-cache-manager-introduction"]], "KV Cache Offloading": [[65, null]], "KV Cache Pool Management": [[120, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[128, "kv-cache-quantization-scaling-factors"]], "KV Cache Rewind": [[107, "kv-cache-rewind"]], "KV Cache Salting for Secure Reuse": [[97, "kv-cache-salting-for-secure-reuse"]], "KV Cache System": [[97, null]], "KV cache reuse": [[121, null]], "KVCacheManager": [[166, "kvcachemanager"]], "Kernel Level optimizations": [[13, "kernel-level-optimizations"]], "Kernel Optimizations": [[20, "kernel-optimizations"]], "Kernel fusion": [[13, "kernel-fusion"]], "Key Capabilities": [[162, "key-capabilities"]], "Key Components": [[41, "key-components"], [157, "key-components"]], "Key Feature:": [[174, "key-feature"]], "Key Features": [[89, "key-features"], [134, null]], "Key Features and Enhancements": [[164, "key-features-and-enhancements"], [164, "id2"], [164, "id7"], [164, "id12"], [164, "id17"], [164, "id18"], [164, "id20"], [164, "id23"], [164, "id28"], [164, "id33"], [164, "id38"], [164, "id45"], [164, "id50"], [164, "id56"], [164, "id62"], [164, "id68"], [164, "id72"], [164, "id74"]], "Key Features of Wide-EP": [[103, "key-features-of-wide-ep"]], "Key Metrics": [[26, "key-metrics"], [29, "key-metrics"], [30, "key-metrics"], [32, "key-metrics"], [33, "key-metrics"]], "Key Optimizations": [[13, "key-optimizations"]], "Known Issue": [[108, "known-issue"]], "Known Issues": [[151, "known-issues"], [157, "known-issues"], [164, "known-issues"], [164, "id6"], [164, "id11"], [164, "id16"], [164, "id21"], [164, "id25"], [164, "id32"], [164, "id44"], [164, "id55"], [164, "id61"], [164, "id77"]], "Known Limitations": [[110, "known-limitations"]], "LLM API Change Guide": [[38, null]], "LLM API Introduction": [[158, null]], "LLM API Options (YAML Configuration)": [[29, "llm-api-options-yaml-configuration"], [30, "llm-api-options-yaml-configuration"], [32, "llm-api-options-yaml-configuration"], [33, "llm-api-options-yaml-configuration"], [34, "llm-api-options-yaml-configuration"], [35, "llm-api-options-yaml-configuration"]], "LLM API with TensorRT Engine": [[156, null]], "LLM Common Customizations": [[50, null]], "LLM Examples": [[58, null]], "LLM Examples Introduction": [[55, null]], "LLM Models": [[154, "llm-models"]], "Latest HBM Memory": [[5, "latest-hbm-memory"]], "Launch Docker Container": [[163, "launch-docker-container"]], "Launch the NGC container": [[26, "launch-the-ngc-container"]], "Launch the Server (Eagle3 Speculative Decoding)": [[9, "launch-the-server-eagle3-speculative-decoding"]], "Launch the TensorRT LLM Server": [[29, "launch-the-tensorrt-llm-server"], [30, "launch-the-tensorrt-llm-server"], [31, "launch-the-tensorrt-llm-server"], [32, "launch-the-tensorrt-llm-server"], [33, "launch-the-tensorrt-llm-server"], [34, "launch-the-tensorrt-llm-server"], [35, "launch-the-tensorrt-llm-server"]], "Launch the TensorRT-LLM Server": [[21, "launch-the-tensorrt-llm-server"]], "Launching TensorRT LLM Serve": [[21, "launching-tensorrt-llm-serve"], [21, "id2"]], "Launching disaggregated servers on SLURM clusters": [[92, "launching-disaggregated-servers-on-slurm-clusters"]], "Launching the TensorRT LLM docker container": [[21, "launching-the-tensorrt-llm-docker-container"]], "Launching the server": [[18, "launching-the-server"]], "LayerNorm Weights": [[128, "layernorm-weights"]], "Layers": [[146, null]], "Limitations": [[125, "limitations"], [164, "limitations"]], "Limitations & Patterns": [[96, "limitations-patterns"]], "Limitations and Caveats": [[44, "limitations-and-caveats"], [136, "limitations-and-caveats"]], "Limitations and tips": [[172, "limitations-and-tips"]], "Limited Attention Window Size": [[97, "limited-attention-window-size"]], "Linear": [[146, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT LLM C++ Runtime": [[110, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[45, "llama-3-1-405b"], [129, "llama-3-1-405b"]], "Llama 3.1 70B": [[129, "llama-3-1-70b"]], "Llama 3.1 8B": [[45, "llama-3-1-8b"]], "Llama 3.3 70B": [[45, "llama-3-3-70b"]], "Llama 4 Maverick": [[45, "llama-4-maverick"]], "Llama 4 Scout": [[45, "llama-4-scout"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[6, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[3, "llama-70b-on-h200-up-to-6-7x-a100"]], "LoRA (Low-Rank Adaptation)": [[99, null], [177, null]], "LoRA Module id mapping": [[122, "lora-module-id-mapping"]], "LoRA arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[122, "lora-tensor-format-details"]], "LoRA with Quantization": [[99, "lora-with-quantization"], [177, "lora-with-quantization"]], "LoRA with tensor parallel": [[122, "lora-with-tensor-parallel"]], "Loading function": [[130, "loading-function"]], "Logging Level": [[87, null], [171, null]], "Logits arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Logits processor": [[106, "logits-processor"]], "Long Sequences": [[98, null]], "Lookahead Decoding": [[125, "lookahead-decoding"]], "LoraCache configuration": [[122, "loracache-configuration"]], "Low Latency Benchmark": [[136, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[139, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[136, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low precision AlltoAll": [[12, "low-precision-alltoall"]], "Low-Precision-AllReduce": [[123, null]], "Low-latency Use-Case": [[21, "low-latency-use-case"]], "Lower precision": [[12, "lower-precision"]], "MLA Layers Optimizations": [[15, "mla-layers-optimizations"]], "MLA chunked context": [[2, "mla-chunked-context"]], "MLP": [[146, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[128, "mlp-weights"]], "MLPerf on H100 with FP8": [[4, "mlperf-on-h100-with-fp8"]], "MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD with errorcode 1.": [[158, "mpi-abort-was-invoked-on-rank-1-in-communicator-mpi-comm-world-with-errorcode-1"]], "MQA / GQA": [[97, "mqa-gqa"]], "MTP": [[13, "mtp"], [107, "mtp"]], "MTP Eagle": [[14, "mtp-eagle"]], "MTP LM head tensor parallelism": [[12, "mtp-lm-head-tensor-parallelism"]], "MTP Modules": [[14, "mtp-modules"]], "MTP Vanilla": [[14, "mtp-vanilla"]], "MTP for inference": [[14, "mtp-for-inference"]], "MTP implementation in TensorRT LLM": [[14, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[14, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[128, "make-evaluation"]], "Make Grammar Computation Capturable by CUDA Graph": [[10, "make-grammar-computation-capturable-by-cuda-graph"]], "Manual TP Sharding Configuration": [[170, "manual-tp-sharding-configuration"]], "Mark Tensors As Output": [[115, "mark-tensors-as-output"]], "Mathematical Modeling": [[8, "mathematical-modeling"]], "Max Throughput Benchmark": [[136, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[144, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Max-Throughput Use Case": [[21, "max-throughput-use-case"]], "Maximum Attention Window Size": [[144, "maximum-attention-window-size"]], "Measurement Methodology": [[17, "measurement-methodology"]], "Medusa": [[125, "medusa"]], "Medusa Tree": [[125, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[151, null]], "Memory pool": [[151, "memory-pool"]], "Methodology Introduction": [[26, "methodology-introduction"]], "Metrics Endpoint": [[27, "metrics-endpoint"]], "Miscellaneous": [[16, "miscellaneous"]], "Mixed ETP": [[13, "mixed-etp"]], "Mixture of Experts (MoE)": [[103, "mixture-of-experts-moe"], [116, "mixture-of-experts-moe"]], "MoE Auxiliary Kernels": [[20, "moe-auxiliary-kernels"]], "MoE Backend Support Matrix": [[29, "moe-backend-support-matrix"], [30, "moe-backend-support-matrix"]], "MoE Layers Optimizations": [[15, "moe-layers-optimizations"]], "Model Architecture": [[13, "model-architecture"]], "Model Configuration": [[118, "model-configuration"], [160, "model-configuration"], [165, "model-configuration"]], "Model Definition": [[129, null], [160, "model-definition"], [165, "model-definition"]], "Model Engine": [[129, "model-engine"], [166, "model-engine"]], "Model Input": [[158, "model-input"]], "Model Recipes": [[36, null]], "Model Registration": [[160, "model-registration"], [165, "model-registration"]], "Model Support Matrix": [[100, "model-support-matrix"]], "Model Supported Matrix": [[104, "model-supported-matrix"]], "Model Updates": [[164, "model-updates"], [164, "id35"], [164, "id40"], [164, "id47"], [164, "id52"], [164, "id58"], [164, "id64"], [164, "id70"], [164, "id73"], [164, "id75"]], "Model-Feature Support Matrix(Key Models)": [[161, "model-feature-support-matrix-key-models"]], "Model-Specific Deployment Guides": [[36, "model-specific-deployment-guides"]], "Models": [[29, "models"], [30, "models"], [31, "models"], [32, "models"], [33, "models"], [34, "models"], [35, "models"], [109, null], [147, null]], "Models (PyTorch Backend)": [[154, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[154, "models-tensorrt-backend"]], "Models with customized key names": [[130, "models-with-customized-key-names"]], "Models with customized weight layout": [[130, "models-with-customized-weight-layout"]], "Modifications to Upper-Level Runtime Logic": [[41, "modifications-to-upper-level-runtime-logic"]], "Modifying Existing Methods": [[38, "modifying-existing-methods"]], "Modifying LLM Class Methods": [[38, "modifying-llm-class-methods"]], "Modifying LLM Constructor Arguments": [[38, "modifying-llm-constructor-arguments"]], "Module-level Parallelism Guide": [[103, "module-level-parallelism-guide"]], "More kernel overlap, fusion and optimization": [[12, "more-kernel-overlap-fusion-and-optimization"]], "Motivation": [[10, "motivation"], [17, "motivation"], [92, "motivation"], [105, "motivation"]], "Motivation and Background": [[8, "motivation-and-background"]], "Motivation for Wide-EP": [[103, "motivation-for-wide-ep"]], "Motivation for large-scale EP": [[16, "motivation-for-large-scale-ep"]], "Motivation of Dynasor-CoT": [[11, "motivation-of-dynasor-cot"]], "Motivation of EP communication kernels for GB200": [[16, "motivation-of-ep-communication-kernels-for-gb200"]], "Multi-GPU and Multi-Node Support": [[129, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[83, null], [117, null]], "Multi-LoRA Support": [[99, "multi-lora-support"], [177, "multi-lora-support"]], "Multi-Modal Models 3": [[154, "multi-modal-models"]], "Multi-Token Prediction (MTP)": [[20, "multi-token-prediction-mtp"]], "Multi-backend Support": [[17, "multi-backend-support"], [92, "multi-backend-support"]], "Multi-node Serving with Slurm": [[27, "multi-node-serving-with-slurm"]], "Multi-streams": [[13, "multi-streams"]], "Multimodal Benchmarking": [[26, "multimodal-benchmarking"]], "Multimodal Chat API": [[27, "multimodal-chat-api"]], "Multimodal Feature Support Matrix (PyTorch Backend)": [[152, null], [161, "multimodal-feature-support-matrix-pytorch-backend"]], "Multimodal Modality Coverage": [[27, "multimodal-modality-coverage"]], "Multimodal Serving": [[27, "multimodal-serving"]], "Multimodal Serving and Benchmarking": [[26, "multimodal-serving-and-benchmarking"]], "Multimodal Support in TensorRT LLM": [[100, null]], "Multiple Profiles": [[143, "multiple-profiles"]], "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM": [[19, null]], "NGram": [[107, "ngram"], [125, "ngram"]], "NVFP4 (Blackwell)": [[153, "nvfp4-blackwell"]], "NVFP4 KV Cache": [[104, "nvfp4-kv-cache"], [104, "id1"]], "Named Arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Natively supported models": [[130, "natively-supported-models"]], "NeMo LoRA Format": [[99, "nemo-lora-format"], [177, "nemo-lora-format"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[6, null]], "Next Steps": [[163, "next-steps"]], "Normalization": [[146, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[115, "note-on-context-outputs"]], "Numerical Precision": [[153, null]], "Observation over GSM8K dataset": [[16, "observation-over-gsm8k-dataset"]], "Observations over one machine translation dataset": [[16, "observations-over-one-machine-translation-dataset"]], "Obtaining Arbitrary Output Tensors": [[115, "obtaining-arbitrary-output-tensors"]], "Offline API: LLM API": [[94, "offline-api-llm-api"]], "Offline EP Load Balancer": [[16, "offline-ep-load-balancer"], [16, "id1"]], "Offline Quantization with ModelOpt": [[104, "offline-quantization-with-modelopt"]], "Offloading to host memory": [[121, "offloading-to-host-memory"]], "Online API: trtllm-serve": [[94, "online-api-trtllm-serve"]], "Online EP Load Balancer": [[16, "online-ep-load-balancer"], [16, "id2"]], "Online Serving Examples": [[81, null]], "Only collect specific iterations": [[43, "only-collect-specific-iterations"], [135, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[75, null]], "OpenAI Chat Client for Multimodal": [[76, null]], "OpenAI Completion Client": [[77, null]], "OpenAI Completion Client with JSON Schema": [[79, null]], "OpenAI Responses Client": [[80, null]], "OpenAI-Compatible Server via trtllm-serve": [[100, "openai-compatible-server-via-trtllm-serve"]], "Openai Completion Client For Lora": [[78, null]], "Operation Fusion": [[108, "operation-fusion"]], "Optimization Highlights": [[20, "optimization-highlights"]], "Optimizations": [[100, "optimizations"]], "Optimize PyTorch native copy and concat using torch.compile": [[12, "optimize-pytorch-native-copy-and-concat-using-torch-compile"]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[15, null]], "Option 1. Use weekly release NGC docker image": [[18, "option-1-use-weekly-release-ngc-docker-image"]], "Option 1: Build TensorRT LLM in One Step": [[110, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[110, "option-1-full-build-with-c-compilation"]], "Option 2. Build TensorRT LLM Docker image (Alternative way)": [[18, "option-2-build-tensorrt-llm-docker-image-alternative-way"]], "Option 2: Container for building TensorRT LLM Step-by-Step": [[110, "option-2-container-for-building-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[110, "option-2-python-only-build-without-c-compilation"]], "Options": [[82, "options"]], "Other Build Modes": [[136, "other-build-modes"]], "Out of memory issues": [[2, "out-of-memory-issues"]], "Out-of-Tree Models": [[160, "out-of-tree-models"], [165, "out-of-tree-models"]], "Overlap Optimization": [[17, "overlap-optimization"], [92, "overlap-optimization"]], "Overlap Scheduler": [[42, "overlap-scheduler"], [101, null], [178, null]], "Overlap kernels using programmatic dependent launch (PDL)": [[12, "overlap-kernels-using-programmatic-dependent-launch-pdl"]], "Overriding Docker Compose configuration": [[40, "overriding-docker-compose-configuration"]], "Overview": [[12, "overview"], [38, "overview"], [45, null], [91, "overview"], [96, "overview"], [118, "overview"], [128, "overview"], [130, "overview"], [131, "overview"], [162, null], [176, "overview"]], "Overview of Parallelism Strategies": [[103, "overview-of-parallelism-strategies"]], "Packed Tensors": [[83, "packed-tensors"]], "Padded and Packed Tensors": [[117, "padded-and-packed-tensors"]], "Page": [[120, "page"]], "Paged Attention, IFB, and Request Scheduling": [[102, null]], "Paged Context Attention": [[143, "paged-context-attention"]], "Paged KV Cache": [[83, "paged-kv-cache"], [102, "paged-kv-cache"], [117, "paged-kv-cache"]], "Parallel strategy": [[15, "parallel-strategy"]], "Parallelism Mapping Support": [[136, "parallelism-mapping-support"]], "Parallelism Strategy": [[13, "parallelism-strategy"]], "Parallelism in TensorRT LLM": [[103, null]], "Pareto Analysis: Throughput-Latency Trade-off Optimization": [[8, "pareto-analysis-throughput-latency-trade-off-optimization"]], "Partial Reuse": [[97, "partial-reuse"]], "Pattern and Pattern Manager": [[119, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[129, "pattern-matching-and-fusion"]], "Performance": [[7, "performance"], [143, "performance"]], "Performance Analysis": [[43, null], [135, null]], "Performance Analysis: Baseline vs. ADP Balance": [[8, "performance-analysis-baseline-vs-adp-balance"]], "Performance Improvements": [[125, "performance-improvements"]], "Performance Optimization Tips": [[84, "performance-optimization-tips"], [168, "performance-optimization-tips"]], "Performance Results": [[8, "performance-results"]], "Performance Studies": [[17, "performance-studies"]], "Performance Study": [[19, "performance-study"]], "Performance Summary": [[8, "performance-summary"]], "Performance Tuning": [[18, "performance-tuning"], [108, "performance-tuning"]], "Performance Tuning Guide": [[140, null]], "Performance and Accuracy Considerations": [[123, "performance-and-accuracy-considerations"]], "Performance and Analysis": [[10, "performance-and-analysis"]], "Performance expectations": [[121, "performance-expectations"]], "Performance study": [[16, "performance-study"]], "Performance with GEMM + SwiGLU Fusion": [[139, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[143, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[139, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[139, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[143, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[139, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[143, "performance-with-multiple-profiles"]], "Persistence mode": [[44, "persistence-mode"], [136, "persistence-mode"]], "Piecewise CUDA Graph": [[108, "piecewise-cuda-graph"]], "Piecewise CUDA Graph & Generation Only CUDA Graph": [[108, "piecewise-cuda-graph-generation-only-cuda-graph"]], "Piecewise CUDA Graph Padding": [[108, "piecewise-cuda-graph-padding"]], "Pipeline Parallel Reduce Scatter Optimization": [[143, "pipeline-parallel-reduce-scatter-optimization"]], "Pipeline Parallelism (PP)": [[103, "pipeline-parallelism-pp"]], "Plugin": [[148, null]], "Plugin config arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[129, "plugins"]], "Pool": [[120, "pool"]], "Pooling": [[146, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[130, "postprocessing-functions"]], "Pre-built release container images on NGC": [[111, null]], "Precision Strategy": [[13, "precision-strategy"]], "Precision Support": [[90, "precision-support"], [175, "precision-support"]], "Precision strategy": [[15, "precision-strategy"]], "Preconfigured Recipes": [[29, "preconfigured-recipes"], [30, "preconfigured-recipes"]], "Prepare": [[133, "prepare"]], "Prepare Dataset": [[137, "prepare-dataset"]], "Prepare Docker image": [[31, "prepare-docker-image"]], "Prepare the TensorRT LLM Checkpoint": [[128, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[44, "preparing-a-dataset"], [45, "preparing-a-dataset"], [136, "preparing-a-dataset"]], "Prerequisite Knowledge": [[140, "prerequisite-knowledge"], [141, null]], "Prerequisites": [[9, "prerequisites"], [18, "prerequisites"], [21, "prerequisites"], [29, "prerequisites"], [30, "prerequisites"], [31, "prerequisites"], [32, "prerequisites"], [33, "prerequisites"], [34, "prerequisites"], [35, "prerequisites"], [110, "prerequisites"], [160, "prerequisites"], [165, "prerequisites"]], "Prerequisites: Install TensorRT LLM and download models": [[2, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[43, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"], [135, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Promoting an API from Beta to Committed": [[38, "promoting-an-api-from-beta-to-committed"]], "Prototype Features": [[157, "prototype-features"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[13, null]], "PyExecutor": [[166, "pyexecutor"]], "PyTorch Backend": [[157, null]], "Python Bindings for the Executor API": [[115, "python-bindings-for-the-executor-api"]], "Python Interface": [[16, "python-interface"]], "Python runtime (Not recommended to be used)": [[151, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[50, "quantization"], [104, null], [149, null], [179, null]], "Quantization APIs": [[131, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[153, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT LLM": [[104, "quantization-in-tensorrt-llm"]], "Quantization in TensorRT-LLM": [[7, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[44, "quantization-in-the-pytorch-flow"], [136, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[139, "quantized-kv-cache"]], "Query the OpenAI-compatible API Endpoint": [[31, "query-the-openai-compatible-api-endpoint"]], "Quick Health Check": [[9, "quick-health-check"]], "Quick Links": [[104, "quick-links"], [109, null]], "Quick Start": [[107, "quick-start"], [157, "quick-start"]], "Quick Start Example": [[158, "quick-start-example"]], "Quick Start Guide": [[163, null]], "Quick Start for Popular Models": [[36, "quick-start-for-popular-models"]], "Quick start": [[100, "quick-start"], [172, "quick-start"]], "Quickstart": [[136, "quickstart"]], "Qwen 3": [[17, "qwen-3"]], "Qwen3-235B-A22B": [[45, "qwen3-235b-a22b"]], "Qwen3-30B-A3B": [[45, "qwen3-30b-a3b"]], "Rank Weights": [[128, "rank-weights"]], "Ray Orchestrator (Prototype)": [[105, null]], "Re-balanced the sparse experts": [[13, "re-balanced-the-sparse-experts"]], "Re-inplace Optimization": [[108, "re-inplace-optimization"]], "ReDrafter": [[125, "redrafter"]], "Recommended Performance Settings": [[29, "recommended-performance-settings"], [30, "recommended-performance-settings"], [32, "recommended-performance-settings"], [33, "recommended-performance-settings"], [34, "recommended-performance-settings"], [35, "recommended-performance-settings"]], "Recompilation": [[108, "recompilation"]], "Reduce Binding and Inter-Process Communication Overhead": [[20, "reduce-binding-and-inter-process-communication-overhead"]], "Reduce Norm Fusion Plugin for Llama models:": [[143, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[139, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[11, "reference"], [127, "reference"]], "References": [[101, "references"], [103, "references"], [178, "references"]], "Regular expression": [[94, "regular-expression"], [94, "id2"]], "Relative Attention Bias (RAB)": [[117, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[13, "relax-acceptance-verification"]], "Relaxed Acceptance": [[14, "relaxed-acceptance"]], "Release Notes": [[164, null]], "Reproducing Benchmarked Results": [[45, "reproducing-benchmarked-results"]], "Reproducing Steps": [[17, "reproducing-steps"]], "Reproducing steps": [[2, "reproducing-steps"], [16, "reproducing-steps"]], "Request Additional Output": [[115, "request-additional-output"]], "Request Time Breakdown": [[26, "request-time-breakdown"]], "ResourceManager": [[166, "resourcemanager"]], "Responses API": [[27, "responses-api"]], "Results": [[137, "results"]], "Retention Policy": [[97, "retention-policy"]], "Rethink network structure": [[12, "rethink-network-structure"]], "Reuse Across Requests": [[97, "reuse-across-requests"]], "Revisiting Paged Context Attention and Context Chunking": [[102, "revisiting-paged-context-attention-and-context-chunking"], [142, "revisiting-paged-context-attention-and-context-chunking"]], "Roadmap": [[89, "roadmap"], [105, "roadmap"], [174, "roadmap"]], "Rotary Positional Embedding (RoPE)": [[83, "rotary-positional-embedding-rope"], [117, "rotary-positional-embedding-rope"]], "RouterGEMM": [[13, "routergemm"]], "Run Docker Container": [[29, "run-docker-container"], [30, "run-docker-container"], [32, "run-docker-container"], [33, "run-docker-container"], [34, "run-docker-container"], [35, "run-docker-container"]], "Run LLM-API with pytorch backend on Slurm": [[67, null]], "Run Offline Inference with LLM API": [[163, "run-offline-inference-with-llm-api"]], "Run benchmarking with trtllm-serve": [[26, null]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[122, null]], "Run the benchmark": [[26, "run-the-benchmark"]], "Run trtllm-bench": [[99, "run-trtllm-bench"], [177, "run-trtllm-bench"]], "Run trtllm-bench with pytorch backend on Slurm": [[68, null]], "Run trtllm-serve with pytorch backend on Slurm": [[69, null]], "Run with trtllm-bench": [[100, "run-with-trtllm-bench"]], "Running Evaluations to Verify Accuracy (Optional)": [[29, "running-evaluations-to-verify-accuracy-optional"], [30, "running-evaluations-to-verify-accuracy-optional"], [32, "running-evaluations-to-verify-accuracy-optional"], [33, "running-evaluations-to-verify-accuracy-optional"]], "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)": [[9, null]], "Running Pre-quantized Models": [[104, "running-pre-quantized-models"]], "Running Tests": [[38, "running-tests"]], "Running Throughput and Latency Benchmarks": [[137, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[126, null]], "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM": [[21, null]], "Running multi-modal models in the PyTorch Workflow": [[44, "running-multi-modal-models-in-the-pytorch-workflow"], [136, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[45, "running-the-benchmark"]], "Running the TensorRT LLM Server": [[21, "running-the-tensorrt-llm-server"]], "Running with the PyTorch Workflow": [[44, "running-with-the-pytorch-workflow"], [136, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [129, "runtime"], [150, null]], "Runtime Configuration Examples": [[71, null]], "Runtime Customization": [[50, "runtime-customization"]], "Runtime Integrations": [[90, "runtime-integrations"], [175, "runtime-integrations"]], "Runtime Optimizations": [[15, "runtime-optimizations"], [42, "runtime-optimizations"]], "RuntimeError: only rank 0 can start multi-node session, got 1": [[158, "runtimeerror-only-rank-0-can-start-multi-node-session-got-1"]], "Sample Chat Completions Request": [[9, "sample-chat-completions-request"]], "Sampling": [[50, "sampling"], [106, null], [180, null]], "Sampling Parameters": [[118, "sampling-parameters"]], "Sampling Techniques Showcase": [[72, null]], "ScaffoldingLlm": [[11, "scaffoldingllm"]], "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)": [[16, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)": [[20, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)": [[12, null]], "Scaling factor(s)": [[83, "scaling-factor-s"], [117, "scaling-factor-s"]], "Scheduler": [[166, "scheduler"], [182, null]], "Scheduler Introduction": [[182, "scheduler-introduction"]], "Scheduling Strategies for Load Balancing": [[8, "scheduling-strategies-for-load-balancing"]], "Seamless Model Deployment from PyTorch to TensorRT LLM": [[89, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "Seamless Model Deployment from PyTorch to TensorRT-LLM": [[174, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "See also": [[172, "see-also"]], "Selecting Triton as the MoE backend": [[21, "selecting-triton-as-the-moe-backend"], [21, "id3"]], "Sender and Receiver": [[41, "sender-and-receiver"]], "Sending Requests with Different Beam Widths": [[115, "sending-requests-with-different-beam-widths"]], "Serving with trtllm-serve": [[172, null]], "Set power limits": [[44, "set-power-limits"], [136, "set-power-limits"]], "Setting": [[8, "setting"]], "Setting up Multimodal Serving": [[26, "setting-up-multimodal-serving"]], "Sharding configuration": [[170, "sharding-configuration"]], "Show code": [[29, null], [29, null], [30, null], [30, null], [32, null], [33, null], [34, null], [35, null]], "Single LoRA Adapter": [[99, "single-lora-adapter"], [177, "single-lora-adapter"]], "Single node hanging when using docker run --net=host": [[158, "single-node-hanging-when-using-docker-run-net-host"]], "Situations that can prevent kv cache reuse": [[121, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention": [[98, "sliding-window-attention"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[83, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"], [117, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Slurm": [[58, "slurm"]], "Smart Router": [[13, "smart-router"]], "Software": [[154, "software"]], "Sparse Attention": [[73, null]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[13, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Decoding": [[10, "speculative-decoding"], [74, null], [97, "speculative-decoding"], [107, null]], "Speculative Sampling": [[125, null]], "Speculative decoding arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[7, null]], "Speed-up for the First Turn": [[19, "speed-up-for-the-first-turn"]], "Start the TensorRT LLM Container": [[9, "start-the-tensorrt-llm-container"]], "Start the trtllm-serve service": [[26, "start-the-trtllm-serve-service"]], "Starting a Server": [[27, "starting-a-server"]], "Starting the Server": [[99, "starting-the-server"], [177, "starting-the-server"]], "Step 1. Write Modeling Part": [[127, "step-1-write-modeling-part"]], "Step 1: Clone the repository": [[18, "step-1-clone-the-repository"]], "Step 1: Create the Checkpoint Loader": [[91, "step-1-create-the-checkpoint-loader"], [176, "step-1-create-the-checkpoint-loader"]], "Step 1: Run inference and collect statistics": [[16, "step-1-run-inference-and-collect-statistics"]], "Step 2. Implement Weight Conversion": [[127, "step-2-implement-weight-conversion"]], "Step 2: Create the Checkpoint Weight Loader": [[91, "step-2-create-the-checkpoint-weight-loader"], [176, "step-2-create-the-checkpoint-weight-loader"]], "Step 2: Generate the EPLB configuration": [[16, "step-2-generate-the-eplb-configuration"]], "Step 2: Prepare the TensorRT LLM release Docker image": [[18, "step-2-prepare-the-tensorrt-llm-release-docker-image"]], "Step 3. Register New Model": [[127, "step-3-register-new-model"]], "Step 3: (Optional) Tag and push the Docker image to your registry": [[18, "step-3-optional-tag-and-push-the-docker-image-to-your-registry"]], "Step 3: Create the Checkpoint Config Loader": [[91, "step-3-create-the-checkpoint-config-loader"], [176, "step-3-create-the-checkpoint-config-loader"]], "Step 3: Run inference with the EPLB configuration": [[16, "step-3-run-inference-with-the-eplb-configuration"]], "Step 4. Verify New Model": [[127, "step-4-verify-new-model"]], "Step 4: Create the Checkpoint Weight Mapper": [[91, "step-4-create-the-checkpoint-weight-mapper"], [176, "step-4-create-the-checkpoint-weight-mapper"]], "Step 4: Start the TensorRT LLM server": [[18, "step-4-start-the-tensorrt-llm-server"]], "Step 5: Test the server with a sample request": [[18, "step-5-test-the-server-with-a-sample-request"]], "Step 6: (Optional) Monitor server logs": [[18, "step-6-optional-monitor-server-logs"]], "Step 7: (Optional) Stop the server": [[18, "step-7-optional-stop-the-server"]], "Step-by-Step Guide": [[160, "step-by-step-guide"], [165, "step-by-step-guide"]], "StreamingLLM": [[83, "streamingllm"], [117, "streamingllm"]], "Structural tag": [[94, "structural-tag"], [94, "id4"]], "Structured output with guided decoding": [[115, "structured-output-with-guided-decoding"]], "Summary": [[136, "summary"]], "Summary of Configuration Option Recommendations:": [[139, "summary-of-configuration-option-recommendations"], [143, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[89, "support-matrix"], [90, null], [154, null], [174, "support-matrix"], [175, null]], "Support Models": [[90, "support-models"], [175, "support-models"]], "Support Stream Interval": [[20, "support-stream-interval"]], "Support matrix": [[153, "support-matrix"]], "Supported C++ Header Files": [[110, "supported-c-header-files"]], "Supported Models": [[95, "supported-models"], [161, null]], "Supported Quantization Modes": [[44, "supported-quantization-modes"], [136, "supported-quantization-modes"]], "Syntax": [[24, "syntax"], [27, "syntax"]], "System Level optimizations": [[13, "system-level-optimizations"]], "TRTLLM Backend": [[13, "trtllm-backend"]], "TRTLLM bench with LORA": [[99, "trtllm-bench-with-lora"], [177, "trtllm-bench-with-lora"]], "TRTLLM serve with LoRA": [[99, "trtllm-serve-with-lora"], [177, "trtllm-serve-with-lora"]], "Table of Contents": [[2, "table-of-contents"], [8, "table-of-contents"], [10, "table-of-contents"], [11, "table-of-contents"], [12, "table-of-contents"], [13, "table-of-contents"], [14, "table-of-contents"], [15, "table-of-contents"], [16, "table-of-contents"], [19, "table-of-contents"], [20, "table-of-contents"], [39, "table-of-contents"], [41, "table-of-contents"], [91, "table-of-contents"], [99, "table-of-contents"], [108, "table-of-contents"], [140, "table-of-contents"], [141, "table-of-contents"], [160, "table-of-contents"], [165, "table-of-contents"], [176, "table-of-contents"], [177, "table-of-contents"]], "Target Model": [[10, "target-model"]], "Technical Detail: The QuantMode Flags": [[153, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[116, "tensor-parallel-vs-expert-parallel"]], "Tensor Parallelism (TP)": [[103, "tensor-parallelism-tp"], [103, "id1"]], "Tensor-Related Methods": [[119, "tensor-related-methods"]], "TensorRT Compiler": [[129, "tensorrt-compiler"]], "TensorRT LLM Benchmarking": [[44, null]], "TensorRT LLM Checkpoint": [[128, null]], "TensorRT LLM Custom Backend": [[108, "tensorrt-llm-custom-backend"]], "TensorRT-LLM Benchmarking": [[136, null]], "TensorRT-LLM Build Workflow": [[131, null]], "TensorRT-LLM Model Weights Loader": [[130, null]], "TensorRT-LLM Release 0.10.0": [[164, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[164, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[164, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[164, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[164, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[164, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[164, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[164, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[164, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[164, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[164, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[164, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.20.0": [[164, "tensorrt-llm-release-0-20-0"]], "TensorRT-LLM Release 0.21.0": [[164, "tensorrt-llm-release-0-21-0"]], "TensorRT-LLM Release 0.7.1": [[164, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[164, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[164, "tensorrt-llm-release-0-9-0"]], "TensorRT-LLM Release 1.0": [[164, "tensorrt-llm-release-1-0"]], "Test definitions": [[39, "test-definitions"]], "Test the Server with a Sample Request": [[21, "test-the-server-with-a-sample-request"]], "Testing API Endpoint": [[29, "testing-api-endpoint"], [30, "testing-api-endpoint"], [32, "testing-api-endpoint"], [33, "testing-api-endpoint"], [34, "testing-api-endpoint"], [35, "testing-api-endpoint"]], "Testing Helix with TensorRT-LLM": [[95, "testing-helix-with-tensorrt-llm"]], "The Basics": [[97, "the-basics"]], "The Executor Class": [[115, "the-executor-class"]], "The Features of the TrtllmAttention Backend": [[83, "the-features-of-the-trtllmattention-backend"]], "The Request Class": [[115, "the-request-class"]], "The Response Class": [[115, "the-response-class"]], "The Result Class": [[115, "the-result-class"]], "The effect of EP Load Balancer": [[16, "the-effect-of-ep-load-balancer"], [16, "id3"]], "The schedulers": [[102, "the-schedulers"]], "The \u201cProbe\u201d technique": [[11, "the-probe-technique"]], "Theoretical Analysis and Modeling": [[8, "theoretical-analysis-and-modeling"]], "Throughput Benchmarking": [[44, "throughput-benchmarking"], [136, "throughput-benchmarking"]], "Throughput Measurements": [[45, "throughput-measurements"]], "Time Per Output Token (TPOT) and Inter-Token Latency (ITL)": [[26, "time-per-output-token-tpot-and-inter-token-latency-itl"], [29, "time-per-output-token-tpot-and-inter-token-latency-itl"], [30, "time-per-output-token-tpot-and-inter-token-latency-itl"], [32, "time-per-output-token-tpot-and-inter-token-latency-itl"], [33, "time-per-output-token-tpot-and-inter-token-latency-itl"]], "Time to First Token (TTFT)": [[26, "time-to-first-token-ttft"], [29, "time-to-first-token-ttft"], [30, "time-to-first-token-ttft"], [32, "time-to-first-token-ttft"], [33, "time-to-first-token-ttft"]], "Tips": [[155, "tips"]], "Tips and Troubleshooting": [[158, "tips-and-troubleshooting"]], "Tips for Piecewise CUDA Graph": [[108, "tips-for-piecewise-cuda-graph"]], "Tokenizer Customization": [[50, "tokenizer-customization"]], "Tokens Per Second (TPS) or Output Token Throughput": [[26, "tokens-per-second-tps-or-output-token-throughput"], [29, "tokens-per-second-tps-or-output-token-throughput"], [30, "tokens-per-second-tps-or-output-token-throughput"], [32, "tokens-per-second-tps-or-output-token-throughput"], [33, "tokens-per-second-tps-or-output-token-throughput"]], "Top Level API": [[166, "top-level-api"]], "Topology Requirements": [[123, "topology-requirements"]], "Torch Compile & Piecewise CUDA Graph": [[108, null]], "Torch IR Optimization": [[108, "torch-ir-optimization"]], "Total Token Throughput": [[26, "total-token-throughput"], [29, "total-token-throughput"], [30, "total-token-throughput"], [32, "total-token-throughput"], [33, "total-token-throughput"]], "Trace Grammar State for Draft Token Proposal and Rejection": [[10, "trace-grammar-state-for-draft-token-proposal-and-rejection"]], "Tradeoff": [[101, "tradeoff"], [178, "tradeoff"]], "Transceiver": [[41, "transceiver"]], "Transfer Agent": [[41, "transfer-agent"]], "Translator": [[130, "translator"]], "Tree-based speculative decoding support": [[14, "tree-based-speculative-decoding-support"]], "Triggering CI Best Practices": [[39, "triggering-ci-best-practices"]], "Triggering Post-merge tests": [[39, "triggering-post-merge-tests"]], "Triton Inference Server": [[17, "triton-inference-server"]], "Trouble shooting": [[130, "trouble-shooting"]], "Troubleshooting": [[31, "troubleshooting"], [155, null]], "Troubleshooting Tips": [[18, "troubleshooting-tips"], [21, "troubleshooting-tips"], [29, "troubleshooting-tips"], [30, "troubleshooting-tips"], [32, "troubleshooting-tips"], [33, "troubleshooting-tips"], [34, "troubleshooting-tips"], [35, "troubleshooting-tips"]], "Troubleshooting Tips and Pitfalls To Avoid": [[137, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[92, "troubleshooting-and-faq"], [114, "troubleshooting-and-faq"]], "Troubleshooting: Data Race between Host and CUDA Callback": [[10, "troubleshooting-data-race-between-host-and-cuda-callback"]], "Troubleshooting: Deadlock by GIL and CUDA Mutex": [[10, "troubleshooting-deadlock-by-gil-and-cuda-mutex"]], "Tuning Case Study": [[142, "tuning-case-study"], [142, "id2"]], "Tuning Max Batch Size": [[142, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[142, null]], "Tuning Max Num Tokens": [[142, "tuning-max-num-tokens"]], "Two Challenges": [[10, "two-challenges"]], "Two Model Speculative Decoding Architecture": [[107, "two-model-speculative-decoding-architecture"]], "Types of Events": [[120, "types-of-events"]], "Understand inference time GPU memory usage": [[151, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[142, "understanding-the-tensorrt-llm-scheduler"]], "Unit Test: MLA Module Correctness": [[95, "unit-test-mla-module-correctness"]], "Unit tests": [[39, "unit-tests"]], "Upload the Docker Image to DockerHub": [[132, "upload-the-docker-image-to-dockerhub"]], "Usage": [[92, "usage"], [96, "usage"], [101, "usage"], [104, "usage"], [108, "usage"], [123, "usage"], [178, "usage"]], "Usage and Examples": [[24, "usage-and-examples"]], "Usage with trtllm-bench and trtllm-serve": [[107, "usage-with-trtllm-bench-and-trtllm-serve"]], "Use Cases": [[96, "use-cases"]], "Useful Build-Time Flags": [[143, null]], "Useful Runtime Options": [[144, null]], "User-provided drafting": [[107, "user-provided-drafting"]], "Using Checkpoint Loaders": [[91, "using-checkpoint-loaders"], [176, "using-checkpoint-loaders"]], "Using Dev Containers": [[40, null]], "Using Medusa with TensorRT-LLM": [[125, "using-medusa-with-tensorrt-llm"]], "Using test_to_stage_mapping.py": [[39, "using-test-to-stage-mapping-py"]], "Validated Networks for Benchmarking": [[44, "validated-networks-for-benchmarking"], [136, "validated-networks-for-benchmarking"]], "Variables": [[45, "variables"]], "Verification and Sampling": [[107, "verification-and-sampling"]], "Visualize the PyTorch profiler results": [[43, "visualize-the-pytorch-profiler-results"], [135, "visualize-the-pytorch-profiler-results"]], "Volume Mounts": [[40, "volume-mounts"]], "WIP: Enable more features by default": [[2, "wip-enable-more-features-by-default"]], "Waiving tests": [[39, "waiving-tests"]], "Weight Bindings": [[129, "weight-bindings"]], "Weight Loading": [[160, "weight-loading"], [165, "weight-loading"]], "Weights absorb and MQA": [[15, "weights-absorb-and-mqa"]], "Welcome to TensorRT LLM\u2019s Documentation!": [[109, null]], "What Can You Do With TensorRT LLM?": [[162, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[120, "what-triggers-an-event"]], "What is H100 FP8?": [[4, "what-is-h100-fp8"]], "What\u2019s coming next": [[7, "whats-coming-next"]], "When to Create Custom Components": [[91, "when-to-create-custom-components"], [176, "when-to-create-custom-components"]], "When to Use Graph Rewriting?": [[119, "when-to-use-graph-rewriting"]], "When to Use Helix": [[95, "when-to-use-helix"]], "Wide Expert Parallelism": [[29, "wide-expert-parallelism"]], "Wide Expert Parallelism (Wide-EP)": [[103, "wide-expert-parallelism-wide-ep"], [103, "id3"]], "WindowBlockManager/BlockManager": [[120, "windowblockmanager-blockmanager"]], "Worker": [[11, "worker"]], "Workflow": [[41, "workflow"], [130, "workflow"], [136, "workflow"]], "Workload Profile": [[13, "workload-profile"]], "World Configuration": [[118, "world-configuration"]], "XQA Optimization": [[83, "xqa-optimization"], [117, "xqa-optimization"]], "YAML Configuration": [[99, "yaml-configuration"], [99, "id1"], [177, "yaml-configuration"], [177, "id1"]], "YAML Configuration Files": [[86, "yaml-configuration-files"], [170, "yaml-configuration-files"]], "_prepare_draft_requests": [[107, "prepare-draft-requests"]], "_prepare_draft_tokens": [[107, "prepare-draft-tokens"]], "additional_model_outputs": [[82, "additional-model-outputs"]], "attention_backend": [[26, "attention-backend"], [29, "attention-backend"], [32, "attention-backend"], [33, "attention-backend"]], "bufferManager.h": [[1, "buffermanager-h"]], "build": [[22, "trtllm-bench-build"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "cnn_dailymail": [[24, "trtllm-eval-cnn-dailymail"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "cuda_graph_config": [[26, "cuda-graph-config"], [29, "cuda-graph-config"], [30, "cuda-graph-config"], [32, "cuda-graph-config"], [33, "cuda-graph-config"], [34, "cuda-graph-config"], [35, "cuda-graph-config"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "dataset": [[22, "dataset"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[27, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[27, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gpqa_diamond": [[24, "trtllm-eval-gpqa-diamond"]], "gpqa_extended": [[24, "trtllm-eval-gpqa-extended"]], "gpqa_main": [[24, "trtllm-eval-gpqa-main"]], "gpt-oss-120b": [[28, "openai-gpt-oss-120b"], [30, "openai-gpt-oss-120b"], [36, "openai-gpt-oss-120b"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "gsm8k": [[24, "trtllm-eval-gsm8k"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "json_mode_eval": [[24, "trtllm-eval-json-mode-eval"]], "kv_cache_config": [[26, "kv-cache-config"], [29, "kv-cache-config"], [32, "kv-cache-config"], [33, "kv-cache-config"]], "kv_cache_config.free_gpu_memory_fraction": [[34, "kv-cache-config-free-gpu-memory-fraction"]], "kv_cache_free_gpu_memory_fraction": [[29, "kv-cache-free-gpu-memory-fraction"], [30, "kv-cache-free-gpu-memory-fraction"], [32, "kv-cache-free-gpu-memory-fraction"], [33, "kv-cache-free-gpu-memory-fraction"], [35, "kv-cache-free-gpu-memory-fraction"]], "latency": [[22, "trtllm-bench-latency"]], "logprobs": [[82, "logprobs"]], "longbench_v2": [[24, "trtllm-eval-longbench-v2"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "max_batch_size": [[29, "max-batch-size"], [30, "max-batch-size"], [32, "max-batch-size"], [33, "max-batch-size"], [34, "max-batch-size"], [35, "max-batch-size"], [102, "max-batch-size"]], "max_batch_size, max_seq_len and max_num_tokens": [[102, "max-batch-size-max-seq-len-and-max-num-tokens"]], "max_num_tokens": [[29, "max-num-tokens"], [30, "max-num-tokens"], [32, "max-num-tokens"], [33, "max-num-tokens"], [34, "max-num-tokens"], [35, "max-num-tokens"], [102, "max-num-tokens"]], "max_seq_len": [[29, "max-seq-len"], [30, "max-seq-len"], [32, "max-seq-len"], [33, "max-seq-len"], [34, "max-seq-len"], [35, "max-seq-len"], [102, "max-seq-len"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "mm_embedding_serve": [[27, "trtllm-serve-mm-embedding-serve"]], "mmlu": [[24, "trtllm-eval-mmlu"]], "mmmu": [[24, "trtllm-eval-mmmu"]], "modelConfig.h": [[1, "modelconfig-h"]], "moe_config": [[26, "moe-config"], [29, "moe-config"], [30, "moe-config"], [32, "moe-config"], [33, "moe-config"], [34, "moe-config"], [35, "moe-config"]], "moe_expert_parallel_size": [[29, "moe-expert-parallel-size"], [30, "moe-expert-parallel-size"], [32, "moe-expert-parallel-size"], [33, "moe-expert-parallel-size"], [34, "moe-expert-parallel-size"], [35, "moe-expert-parallel-size"]], "prepare_dataset": [[22, "prepare-dataset"]], "prepare_dataset.py": [[22, "prepare-dataset-py"]], "process_grid (dict, default: None)": [[170, "process-grid-dict-default-none"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "prompt_logprobs": [[82, "prompt-logprobs"]], "rawEngine.h": [[1, "rawengine-h"]], "requires_shape_prop (bool, default: true)": [[170, "requires-shape-prop-bool-default-true"]], "return_context_logits": [[82, "return-context-logits"]], "return_generation_logits": [[82, "return-generation-logits"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[27, "trtllm-serve-serve"]], "sharding_dims (list, default: ['tp', 'ep', 'bmm'])": [[170, "sharding-dims-list-default-tp-ep-bmm"]], "sharding_source (list, default: ['manual', 'factory', 'heuristic'])": [[170, "sharding-source-list-default-manual-factory-heuristic"]], "simple_shard_only (bool, default: false)": [[170, "simple-shard-only-bool-default-false"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "support_partial_config (bool, default: true)": [[170, "support-partial-config-bool-default-true"]], "tensor.h": [[0, "tensor-h"]], "tensor_parallel_size": [[29, "tensor-parallel-size"], [30, "tensor-parallel-size"], [32, "tensor-parallel-size"], [33, "tensor-parallel-size"], [34, "tensor-parallel-size"], [35, "tensor-parallel-size"]], "throughput": [[22, "trtllm-bench-throughput"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "token_norm_dist": [[22, "token-norm-dist"]], "token_unif_dist": [[22, "token-unif-dist"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-bench": [[22, null], [22, "trtllm-bench"]], "trtllm-build": [[23, null]], "trtllm-eval": [[24, null], [24, "trtllm-eval"]], "trtllm-serve": [[17, "trtllm-serve"], [25, null], [27, null], [27, "trtllm-serve"], [92, "trtllm-serve"]], "trust_remote_code": [[29, "trust-remote-code"], [30, "trust-remote-code"], [32, "trust-remote-code"], [33, "trust-remote-code"], [34, "trust-remote-code"], [35, "trust-remote-code"]], "types.h": [[0, "types-h"]], "virtualMemory.h": [[1, "virtualmemory-h"]], "wo GEMM FP4 quantization": [[12, "wo-gemm-fp4-quantization"]], "worldConfig.h": [[1, "worldconfig-h"]], "\u26a1 State-of-the-Art Performance": [[162, "state-of-the-art-performance"]], "\ud83c\udfaf Comprehensive Model Support": [[162, "comprehensive-model-support"]], "\ud83d\udd25 Architected on Pytorch": [[162, "architected-on-pytorch"]], "\ud83d\udd27 Latest GPU Architecture Support": [[162, "latest-gpu-architecture-support"]], "\ud83d\ude80 Advanced Optimization & Production Features": [[162, "advanced-optimization-production-features"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog10_ADP_Balance_Strategy", "blogs/tech_blog/blog11_GPT_OSS_Eagle3", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM", "commands/trtllm-bench", "commands/trtllm-build", "commands/trtllm-eval", "commands/trtllm-serve/index", "commands/trtllm-serve/run-benchmark-with-trtllm-serve", "commands/trtllm-serve/trtllm-serve", "deployment-guide/config_table", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm", "deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-on-trtllm", "deployment-guide/index", "deployment-guide/note_sections", "developer-guide/api-change", "developer-guide/ci-overview", "developer-guide/dev-containers", "developer-guide/kv-transfer", "developer-guide/overview", "developer-guide/perf-analysis", "developer-guide/perf-benchmarking", "developer-guide/perf-overview", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/curl_responses_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/dynamo_k8s_example", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/kvcacheconfig", "examples/kvcacheretentionconfig", "examples/llm_api_examples", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_distributed", "examples/llm_kv_cache_connector", "examples/llm_kv_cache_offloading", "examples/llm_logits_processor", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_runtime", "examples/llm_sampling", "examples/llm_sparse_attention", "examples/llm_speculative_decoding", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/openai_completion_client_for_lora", "examples/openai_completion_client_json_schema", "examples/openai_responses_client", "examples/trtllm_serve_examples", "features/additional-outputs", "features/attention", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench", "features/auto_deploy/advanced/example_run", "features/auto_deploy/advanced/expert_configurations", "features/auto_deploy/advanced/logging", "features/auto_deploy/advanced/workflow", "features/auto_deploy/auto-deploy", "features/auto_deploy/support_matrix", "features/checkpoint-loading", "features/disagg-serving", "features/feature-combination-matrix", "features/guided-decoding", "features/helix", "features/kv-cache-connector", "features/kvcache", "features/long-sequence", "features/lora", "features/multi-modality", "features/overlap-scheduler", "features/paged-attention-ifb-scheduler", "features/parallel-strategy", "features/quantization", "features/ray-orchestrator", "features/sampling", "features/speculative-decoding", "features/torch_compile_and_piecewise_cuda_graph", "index", "installation/build-from-source-linux", "installation/containers", "installation/index", "installation/linux", "legacy/advanced/disaggregated-service", "legacy/advanced/executor", "legacy/advanced/expert-parallelism", "legacy/advanced/gpt-attention", "legacy/advanced/gpt-runtime", "legacy/advanced/graph-rewriting", "legacy/advanced/kv-cache-management", "legacy/advanced/kv-cache-reuse", "legacy/advanced/lora", "legacy/advanced/lowprecision-pcie-allreduce", "legacy/advanced/open-sourced-cutlass-kernels", "legacy/advanced/speculative-decoding", "legacy/advanced/weight-streaming", "legacy/architecture/add-model", "legacy/architecture/checkpoint", "legacy/architecture/core-concepts", "legacy/architecture/model-weights-loader", "legacy/architecture/workflow", "legacy/dev-on-cloud/build-image-to-dockerhub", "legacy/dev-on-cloud/dev-on-runpod", "legacy/key-features", "legacy/performance/perf-analysis", "legacy/performance/perf-benchmarking", "legacy/performance/performance-tuning-guide/benchmarking-default-performance", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy", "legacy/performance/performance-tuning-guide/fp8-quantization", "legacy/performance/performance-tuning-guide/index", "legacy/performance/performance-tuning-guide/introduction", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "legacy/performance/performance-tuning-guide/useful-build-time-flags", "legacy/performance/performance-tuning-guide/useful-runtime-flags", "legacy/python-api/tensorrt_llm.functional", "legacy/python-api/tensorrt_llm.layers", "legacy/python-api/tensorrt_llm.models", "legacy/python-api/tensorrt_llm.plugin", "legacy/python-api/tensorrt_llm.quantization", "legacy/python-api/tensorrt_llm.runtime", "legacy/reference/memory", "legacy/reference/multimodal-feature-support-matrix", "legacy/reference/precision", "legacy/reference/support-matrix", "legacy/reference/troubleshooting", "legacy/tensorrt_quickstart", "legacy/torch", "llm-api/index", "llm-api/reference", "models/adding-new-model", "models/supported-models", "overview", "quick-start-guide", "release-notes", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench", "torch/auto_deploy/advanced/example_run", "torch/auto_deploy/advanced/expert_configurations", "torch/auto_deploy/advanced/logging", "torch/auto_deploy/advanced/serving_with_trtllm_serve", "torch/auto_deploy/advanced/workflow", "torch/auto_deploy/auto-deploy", "torch/auto_deploy/support_matrix", "torch/features/checkpoint_loading", "torch/features/lora", "torch/features/overlap_scheduler", "torch/features/quantization", "torch/features/sampling", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog10_ADP_Balance_Strategy.md", "blogs/tech_blog/blog11_GPT_OSS_Eagle3.md", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.md", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.md", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.md", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.md", "commands/trtllm-bench.rst", "commands/trtllm-build.rst", "commands/trtllm-eval.rst", "commands/trtllm-serve/index.rst", "commands/trtllm-serve/run-benchmark-with-trtllm-serve.md", "commands/trtllm-serve/trtllm-serve.rst", "deployment-guide/config_table.rst", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md", "deployment-guide/deployment-guide-for-kimi-k2-thinking-on-trtllm.md", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-on-trtllm.md", "deployment-guide/index.rst", "deployment-guide/note_sections.rst", "developer-guide/api-change.md", "developer-guide/ci-overview.md", "developer-guide/dev-containers.md", "developer-guide/kv-transfer.md", "developer-guide/overview.md", "developer-guide/perf-analysis.md", "developer-guide/perf-benchmarking.md", "developer-guide/perf-overview.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/curl_responses_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/dynamo_k8s_example.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/kvcacheconfig.md", "examples/kvcacheretentionconfig.md", "examples/llm_api_examples.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_distributed.rst", "examples/llm_kv_cache_connector.rst", "examples/llm_kv_cache_offloading.rst", "examples/llm_logits_processor.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_runtime.rst", "examples/llm_sampling.rst", "examples/llm_sparse_attention.rst", "examples/llm_speculative_decoding.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/openai_completion_client_for_lora.rst", "examples/openai_completion_client_json_schema.rst", "examples/openai_responses_client.rst", "examples/trtllm_serve_examples.rst", "features/additional-outputs.md", "features/attention.md", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "features/auto_deploy/advanced/example_run.md", "features/auto_deploy/advanced/expert_configurations.md", "features/auto_deploy/advanced/logging.md", "features/auto_deploy/advanced/workflow.md", "features/auto_deploy/auto-deploy.md", "features/auto_deploy/support_matrix.md", "features/checkpoint-loading.md", "features/disagg-serving.md", "features/feature-combination-matrix.md", "features/guided-decoding.md", "features/helix.md", "features/kv-cache-connector.md", "features/kvcache.md", "features/long-sequence.md", "features/lora.md", "features/multi-modality.md", "features/overlap-scheduler.md", "features/paged-attention-ifb-scheduler.md", "features/parallel-strategy.md", "features/quantization.md", "features/ray-orchestrator.md", "features/sampling.md", "features/speculative-decoding.md", "features/torch_compile_and_piecewise_cuda_graph.md", "index.rst", "installation/build-from-source-linux.md", "installation/containers.md", "installation/index.rst", "installation/linux.md", "legacy/advanced/disaggregated-service.md", "legacy/advanced/executor.md", "legacy/advanced/expert-parallelism.md", "legacy/advanced/gpt-attention.md", "legacy/advanced/gpt-runtime.md", "legacy/advanced/graph-rewriting.md", "legacy/advanced/kv-cache-management.md", "legacy/advanced/kv-cache-reuse.md", "legacy/advanced/lora.md", "legacy/advanced/lowprecision-pcie-allreduce.md", "legacy/advanced/open-sourced-cutlass-kernels.md", "legacy/advanced/speculative-decoding.md", "legacy/advanced/weight-streaming.md", "legacy/architecture/add-model.md", "legacy/architecture/checkpoint.md", "legacy/architecture/core-concepts.md", "legacy/architecture/model-weights-loader.md", "legacy/architecture/workflow.md", "legacy/dev-on-cloud/build-image-to-dockerhub.md", "legacy/dev-on-cloud/dev-on-runpod.md", "legacy/key-features.md", "legacy/performance/perf-analysis.md", "legacy/performance/perf-benchmarking.md", "legacy/performance/performance-tuning-guide/benchmarking-default-performance.md", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "legacy/performance/performance-tuning-guide/fp8-quantization.md", "legacy/performance/performance-tuning-guide/index.rst", "legacy/performance/performance-tuning-guide/introduction.md", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "legacy/performance/performance-tuning-guide/useful-build-time-flags.md", "legacy/performance/performance-tuning-guide/useful-runtime-flags.md", "legacy/python-api/tensorrt_llm.functional.rst", "legacy/python-api/tensorrt_llm.layers.rst", "legacy/python-api/tensorrt_llm.models.rst", "legacy/python-api/tensorrt_llm.plugin.rst", "legacy/python-api/tensorrt_llm.quantization.rst", "legacy/python-api/tensorrt_llm.runtime.rst", "legacy/reference/memory.md", "legacy/reference/multimodal-feature-support-matrix.md", "legacy/reference/precision.md", "legacy/reference/support-matrix.md", "legacy/reference/troubleshooting.md", "legacy/tensorrt_quickstart.md", "legacy/torch.md", "llm-api/index.md", "llm-api/reference.rst", "models/adding-new-model.md", "models/supported-models.md", "overview.md", "quick-start-guide.md", "release-notes.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "torch/auto_deploy/advanced/example_run.md", "torch/auto_deploy/advanced/expert_configurations.md", "torch/auto_deploy/advanced/logging.md", "torch/auto_deploy/advanced/serving_with_trtllm_serve.md", "torch/auto_deploy/advanced/workflow.md", "torch/auto_deploy/auto-deploy.md", "torch/auto_deploy/support_matrix.md", "torch/features/checkpoint_loading.md", "torch/features/lora.md", "torch/features/overlap_scheduler.md", "torch/features/quantization.md", "torch/features/sampling.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--accuracy_threshold": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false]], "--apply_chat_template": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false]], "--backend": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-throughput-backend", false], [24, "cmdoption-trtllm-eval-backend", false], [27, "cmdoption-trtllm-serve-serve-backend", false]], "--beam_width": [[22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false]], "--chat_template": [[27, "cmdoption-trtllm-serve-serve-chat_template", false]], "--chat_template_kwargs": [[24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false]], "--check_accuracy": [[24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false]], "--cluster_size": [[22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false]], "--concurrency": [[22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false]], "--config": [[22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [24, "cmdoption-trtllm-eval-config", false], [27, "cmdoption-trtllm-serve-serve-config", false]], "--config_file": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--context_parallel_size": [[27, "cmdoption-trtllm-serve-serve-context_parallel_size", false]], "--cot": [[24, "cmdoption-trtllm-eval-longbench_v2-cot", false]], "--cp_size": [[27, "cmdoption-trtllm-serve-serve-context_parallel_size", false]], "--custom_module_dirs": [[22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false]], "--data_device": [[22, "cmdoption-trtllm-bench-throughput-data_device", false]], "--dataset": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false]], "--dataset_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false]], "--difficulty": [[24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false]], "--disable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--disable_kv_cache_reuse": [[24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false]], "--disagg_cluster_uri": [[27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false]], "--domain": [[24, "cmdoption-trtllm-eval-longbench_v2-domain", false]], "--enable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--enable_chunked_prefill": [[27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false]], "--engine_dir": [[22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false]], "--eos_id": [[22, "cmdoption-trtllm-bench-throughput-eos_id", false]], "--ep": [[22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-throughput-ep", false]], "--ep_size": [[24, "cmdoption-trtllm-eval-ep_size", false], [27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false]], "--extra_encoder_options": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false]], "--extra_llm_api_options": [[22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [24, "cmdoption-trtllm-eval-config", false], [27, "cmdoption-trtllm-serve-serve-config", false]], "--fail_fast_on_attention_window_too_large": [[27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false]], "--fewshot_as_multiturn": [[24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false]], "--free_gpu_memory_fraction": [[27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false]], "--gpus_per_node": [[24, "cmdoption-trtllm-eval-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-serve-host", false]], "--image_data_format": [[22, "cmdoption-trtllm-bench-throughput-image_data_format", false]], "--iteration_log": [[22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false]], "--kv_cache_free_gpu_mem_fraction": [[22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false]], "--kv_cache_free_gpu_memory_fraction": [[24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false]], "--length": [[24, "cmdoption-trtllm-eval-longbench_v2-length", false]], "--log_level": [[22, "cmdoption-trtllm-bench-log_level", false], [24, "cmdoption-trtllm-eval-log_level", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[24, "cmdoption-trtllm-eval-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_input_len": [[22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false]], "--max_input_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false]], "--max_len": [[24, "cmdoption-trtllm-eval-longbench_v2-max_len", false]], "--max_num_tokens": [[22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_output_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false]], "--max_seq_len": [[22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--media_io_kwargs": [[27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false]], "--medusa_choices": [[22, "cmdoption-trtllm-bench-latency-medusa_choices", false]], "--metadata_server_config_file": [[27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false]], "--metrics-log-interval": [[27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false]], "--modality": [[22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-throughput-modality", false]], "--model": [[22, "cmdoption-trtllm-bench-m", false], [24, "cmdoption-trtllm-eval-model", false]], "--model_path": [[22, "cmdoption-trtllm-bench-model_path", false]], "--moe_cluster_parallel_size": [[27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false]], "--moe_expert_parallel_size": [[27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false]], "--no_context": [[24, "cmdoption-trtllm-eval-longbench_v2-no_context", false]], "--no_skip_tokenizer_init": [[22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false]], "--no_weights_loading": [[22, "cmdoption-trtllm-bench-build-no_weights_loading", false]], "--num_fewshot": [[24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false]], "--num_postprocess_workers": [[27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--num_requests": [[22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false]], "--num_samples": [[24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false]], "--otlp_traces_endpoint": [[27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false]], "--output_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false]], "--output_json": [[22, "cmdoption-trtllm-bench-throughput-output_json", false]], "--pipeline_parallel_size": [[27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false]], "--port": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-serve-port", false]], "--pp": [[22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-throughput-pp", false]], "--pp_size": [[22, "cmdoption-trtllm-bench-build-pp", false], [24, "cmdoption-trtllm-eval-pp_size", false], [27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false]], "--prompts_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false]], "--quantization": [[22, "cmdoption-trtllm-bench-build-q", false]], "--rag": [[24, "cmdoption-trtllm-eval-longbench_v2-rag", false]], "--random_seed": [[24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false]], "--reasoning_parser": [[27, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--report_json": [[22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false]], "--request_json": [[22, "cmdoption-trtllm-bench-throughput-request_json", false]], "--request_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "--revision": [[22, "cmdoption-trtllm-bench-revision", false], [24, "cmdoption-trtllm-eval-revision", false], [27, "cmdoption-trtllm-serve-serve-revision", false]], "--rouge_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false]], "--sampler_options": [[22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false]], "--scheduler_policy": [[22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false]], "--server_role": [[27, "cmdoption-trtllm-serve-serve-server_role", false]], "--server_start_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "--start_idx": [[24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false]], "--streaming": [[22, "cmdoption-trtllm-bench-throughput-streaming", false]], "--system_prompt": [[24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "--target_input_len": [[22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false]], "--target_output_len": [[22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false]], "--tensor_parallel_size": [[27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false]], "--tokenizer": [[24, "cmdoption-trtllm-eval-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tool_parser": [[27, "cmdoption-trtllm-serve-serve-tool_parser", false]], "--tp": [[22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-throughput-tp", false]], "--tp_size": [[22, "cmdoption-trtllm-bench-build-tp", false], [24, "cmdoption-trtllm-eval-tp_size", false], [27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false]], "--trust_remote_code": [[22, "cmdoption-trtllm-bench-build-trust_remote_code", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "--warmup": [[22, "cmdoption-trtllm-bench-latency-warmup", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "--workspace": [[22, "cmdoption-trtllm-bench-w", false]], "-c": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-l": [[27, "cmdoption-trtllm-serve-disaggregated-l", false]], "-m": [[22, "cmdoption-trtllm-bench-m", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false]], "-pp": [[22, "cmdoption-trtllm-bench-build-pp", false]], "-q": [[22, "cmdoption-trtllm-bench-build-q", false]], "-r": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "-tp": [[22, "cmdoption-trtllm-bench-build-tp", false]], "-w": [[22, "cmdoption-trtllm-bench-w", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[159, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[159, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.lorarequest method)": [[159, "tensorrt_llm.llmapi.LoRARequest.__init__", false]], "__init__() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.__init__", false]], "__init__() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[159, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requesterror method)": [[159, "tensorrt_llm.llmapi.RequestError.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker.input method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.__init__", false]], "__init__() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[159, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abort() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.abort", false]], "aborted() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.aborted", false]], "abs() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.abs", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold", false]], "acceptance_window (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window", false]], "activation() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "adapter_id (tensorrt_llm.llmapi.lorarequest property)": [[159, "tensorrt_llm.llmapi.LoRARequest.adapter_id", false]], "add() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[145, "tensorrt_llm.functional.Conditional.add_input", false]], "add_note() (tensorrt_llm.llmapi.requesterror method)": [[159, "tensorrt_llm.llmapi.RequestError.add_note", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[145, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[150, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_context_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.additional_context_outputs", false]], "additional_generation_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.additional_generation_outputs", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "algorithm (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm", false]], "algorithm (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.algorithm", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.allgather", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.allow_advanced_sampling", false]], "allow_advanced_sampling (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.allow_advanced_sampling", false]], "allreduce() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.allreduce", false]], "allreduce_strategy (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.arange", false]], "aresult() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.aresult", false]], "argmax() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.argmax", false]], "args (tensorrt_llm.llmapi.requesterror attribute)": [[159, "tensorrt_llm.llmapi.RequestError.args", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[147, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.assertion", false]], "asyncllm (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.AsyncLLM", false]], "attention (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.Attention", false]], "attention_dp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.attention_dp_config", false]], "attention_dp_events_gather_period_ms (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.attention_dp_events_gather_period_ms", false]], "attentiondpconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig", false]], "attentiondpconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.Config", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.AUTO", false]], "autodecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig", false]], "autodecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.Config", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[146, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.axes", false]], "backend (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.backend", false]], "backend (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.backend", false]], "backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.backend", false]], "backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.backend", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batch_sizes (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.batch_sizes", false]], "batch_wait_max_tokens_ratio (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_max_tokens_ratio", false]], "batch_wait_timeout_iters (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_iters", false]], "batch_wait_timeout_ms (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_ms", false]], "batched_logits_processor (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.batched_logits_processor", false]], "batched_logits_processor (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.batched_logits_processor", false]], "batching_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.batching_type", false]], "batching_wait_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.batching_wait_iters", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "begin_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token", false]], "bert_attention() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.bert_attention", false]], "bert_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.bert_attention_plugin", false]], "bert_context_fmha_fp32_acc (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.bert_context_fmha_fp32_acc", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[159, "id13", false]], "cache_transceiver_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.cache_transceiver_config", false]], "cache_transceiver_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.cache_transceiver_config", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "cachetransceiverconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CalibConfig", false]], "calibconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CalibConfig.Config", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "capitalize() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.capitalize", false]], "capture_num_tokens (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.capture_num_tokens", false]], "casefold() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.casefold", false]], "casefold() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.casefold", false]], "cast (class in tensorrt_llm.layers.cast)": [[146, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "center() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.center", false]], "center() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.center", false]], "center() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.center", false]], "center() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.center", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[147, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[147, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[147, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[147, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[147, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[147, "tensorrt_llm.models.PretrainedModel.check_config", false]], "check_eagle_choices() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.check_eagle_choices", false]], "checkpoint_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_format", false]], "checkpoint_loader (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_loader", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.chunk", false]], "ckpt_source (tensorrt_llm.llmapi.lorarequest property)": [[159, "tensorrt_llm.llmapi.LoRARequest.ckpt_source", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clear_logprob_params() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.clear_logprob_params", false]], "client_id (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.client_id", false]], "clip() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.CLIPVisionTransformer", false]], "coerce_env_overrides_to_str() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.coerce_env_overrides_to_str", false]], "coerce_env_overrides_to_str() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.coerce_env_overrides_to_str", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[146, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[146, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "collective_rpc() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.collective_rpc", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[146, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[147, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[147, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[147, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[147, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[147, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[147, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[147, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[147, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[147, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[147, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[147, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[147, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[147, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[147, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[147, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[147, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.constants_to_tensors_", false]], "construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.construct", false]], "construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.construct", false]], "construct() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.construct", false]], "construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.construct", false]], "construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.construct", false]], "construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.construct", false]], "construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.construct", false]], "construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.construct", false]], "construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.construct", false]], "construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.construct", false]], "context (tensorrt_llm.runtime.session property)": [[150, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.context_fmha", false]], "context_fmha_type (tensorrt_llm.plugin.pluginconfig property)": [[148, "tensorrt_llm.plugin.PluginConfig.context_fmha_type", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_logits (tensorrt_llm.llmapi.requestoutput property)": [[159, "id6", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[150, "tensorrt_llm.runtime.Session.context_mem_size", false]], "context_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.context_parallel_size", false]], "context_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.context_parallel_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[146, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[146, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[146, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_enable_disable() (tensorrt_llm.plugin.pluginconfig class method)": [[148, "tensorrt_llm.plugin.PluginConfig.convert_enable_disable", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[146, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.copy", false]], "copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.copy", false]], "copy() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.copy", false]], "copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.copy", false]], "copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.copy", false]], "copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.copy", false]], "copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.copy", false]], "copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.copy", false]], "copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.copy", false]], "copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.copy", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.cos", false]], "count() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.count", false]], "count() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.count", false]], "count() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.count", false]], "count() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.count", false]], "count() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.count", false]], "cp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.cp_config", false]], "cp_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.cp_config", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[146, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[147, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_long_rope_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope_for_attention_plugin", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[146, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_config", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.cuda_stream_sync", false]], "cudagraphconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig", false]], "cudagraphconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.Config", false]], "cumsum() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseeksparseattentionconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig", false]], "deepseeksparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[147, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "default_record_creator() (tensorrt_llm.llmapi.requestoutput.postprocworker static method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.default_record_creator", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.device", false]], "dict() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.dict", false]], "dict() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.dict", false]], "dict() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.dict", false]], "dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.dict", false]], "dict() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.dict", false]], "dict() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.dict", false]], "dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.dict", false]], "dict() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.dict", false]], "dict() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.dict", false]], "dict() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.dict", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[145, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_finalize_fusion (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.disable_finalize_fusion", false]], "disable_flashinfer_sampling (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.disable_flashinfer_sampling", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregated_params (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.div", false]], "do_tracing() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.do_tracing", false]], "dora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.dora_plugin", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.dora_plugin", false]], "draft_len_schedule (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "drafter (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter", false]], "drafttargetdecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig", false]], "drafttargetdecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[145, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.dtype", false]], "dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.dtype", false]], "dtype (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.dtype", false]], "dtype (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[150, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "dynamicbatchconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.Config", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagle_choices (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagledecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.Config", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_attention_dp (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_attention_dp", false]], "enable_attention_dp (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_attention_dp", false]], "enable_autotuner (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner", false]], "enable_balance (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.enable_balance", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_chunked_prefill", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_chunked_prefill", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_fullgraph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.enable_fullgraph", false]], "enable_inductor (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.enable_inductor", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lora (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lora", false]], "enable_lora (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lora", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_padding (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.enable_padding", false]], "enable_paged_kv_cache() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.enable_paged_kv_cache", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_piecewise_cuda_graph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.enable_piecewise_cuda_graph", false]], "enable_prompt_adapter (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_prompt_adapter", false]], "enable_sleep (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.enable_sleep", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_userbuffers (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.enable_userbuffers", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encode() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.encode", false]], "encode() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.encode", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[150, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "end_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.end_thinking_phase_token", false]], "endswith() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.endswith", false]], "endswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.endswith", false]], "engine (tensorrt_llm.runtime.session property)": [[150, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "env_overrides (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.env_overrides", false]], "env_overrides (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.env_overrides", false]], "eq() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "error (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.error", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.expand_mask", false]], "expandtabs() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.expandtabs", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extendedruntimeperfknobconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config", false]], "extra (tensorrt_llm.llmapi.attentiondpconfig.config attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.autodecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cachetransceiverconfig.config attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.calibconfig.config attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cudagraphconfig.config attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig.config attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.drafttargetdecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.dynamicbatchconfig.config attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.eagledecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig.config attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.kvcacheconfig.config attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.lookaheaddecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.medusadecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.moeconfig.config attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.mtpdecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.ngramdecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.rocketsparseattentionconfig.config attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.schedulerconfig.config attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchcompileconfig.config attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchllmargs.config attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.trtllmargs.config attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.userprovideddecodingconfig.config attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config.extra", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.fail_fast_on_attention_window_too_large", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.fail_fast_on_attention_window_too_large", false]], "falconconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[146, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[146, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "file_prefix (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.file_prefix", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[146, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[146, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[146, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[146, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.find", false]], "find() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.find", false]], "find() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.find", false]], "find() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.find", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "finished (tensorrt_llm.llmapi.requestoutput property)": [[159, "id7", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_dynamic_quantization (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.force_dynamic_quantization", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "format() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.format", false]], "format() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format", false]], "format() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.format", false]], "format() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.format", false]], "format_map() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.format_map", false]], "format_map() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.format_map", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[146, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[146, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[146, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[146, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[146, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[146, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[146, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[146, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[146, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[146, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[146, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[146, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[146, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[146, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[146, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[146, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[146, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[146, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[146, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[146, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[146, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[146, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[146, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[146, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[146, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[146, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[146, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[146, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[146, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[146, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[146, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[146, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[146, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[146, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[147, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[147, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[147, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[147, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[147, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[147, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[147, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[147, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[147, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[147, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[147, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[147, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[147, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[147, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[147, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[147, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[147, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[147, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[147, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[147, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[147, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "fp8_rowwise_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.fp8_rowwise_gemm_plugin", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_arguments() (tensorrt_llm.plugin.pluginconfig class method)": [[148, "tensorrt_llm.plugin.PluginConfig.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[147, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[147, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[147, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[159, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[147, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[150, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[150, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[150, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[150, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[147, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[147, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[147, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[147, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[147, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[147, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[147, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[147, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[147, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[147, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[147, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[147, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[147, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[147, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[147, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[147, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[147, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[147, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[147, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[147, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[147, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[147, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[147, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[147, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[147, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_kwargs() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs", false]], "from_kwargs() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[147, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_model_config_cpp() (tensorrt_llm.runtime.modelconfig class method)": [[150, "tensorrt_llm.runtime.ModelConfig.from_model_config_cpp", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[147, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[147, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_orm() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_orm", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.from_pybind", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[150, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[145, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_fp4_quant (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.fuse_fp4_quant", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[145, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "garbage_collection_gen0_threshold (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.garbage_collection_gen0_threshold", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[145, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.gemm_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gemm_swiglu", false]], "gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.gemm_swiglu_plugin", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[147, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[147, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[147, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[147, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[147, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.generate", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[150, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[150, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[150, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.generate_async", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[150, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[150, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_executor_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[146, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[147, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_indices_block_size() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.get_indices_block_size", false]], "get_indices_block_size() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.get_kv_cache_events_async", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_enum_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_variable_fields", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.get_runtime_sizes", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[150, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.get_stats", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.get_stats_async", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_stats_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.gpt_attention_plugin", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "gpus_per_node (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.gpus_per_node", false]], "gpus_per_node (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.gpus_per_node", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[145, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guided_decoding_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.guided_decoding_backend", false]], "guided_decoding_backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.guided_decoding_backend", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[145, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[145, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[145, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.identity", false]], "identity_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.identity_plugin", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.index", false]], "index() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.index", false]], "index() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.index", false]], "index() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.index", false]], "index() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.index", false]], "index_head_dim (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_head_dim", false]], "index_n_heads (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads", false]], "index_select() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.index_select", false]], "index_topk (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk", false]], "indexer_max_chunk_size (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[150, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[159, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_backend() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.init_backend", false]], "init_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.init_build_config", false]], "init_calib_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.init_calib_config", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_comm_session() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.is_comm_session", false]], "is_context_fmha_enabled() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.is_context_fmha_enabled", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_final (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.is_final", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[147, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[147, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_linear_tree (tensorrt_llm.llmapi.autodecodingconfig property)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.eagledecodingconfig property)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.medusadecodingconfig property)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.is_linear_tree", false]], "is_linear_tree (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.is_linear_tree", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[159, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.functional.moeallreduceparams method)": [[145, "tensorrt_llm.functional.MoEAllReduceParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[146, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[146, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[146, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "isalnum() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isalnum", false]], "isalpha() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isalpha", false]], "isascii() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isascii", false]], "isascii() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isascii", false]], "isdecimal() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isdecimal", false]], "isdigit() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isdigit", false]], "isidentifier() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isidentifier", false]], "islower() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.islower", false]], "islower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.islower", false]], "isnumeric() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isnumeric", false]], "isprintable() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isprintable", false]], "isspace() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isspace", false]], "isspace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isspace", false]], "istitle() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.istitle", false]], "istitle() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.istitle", false]], "isupper() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.isupper", false]], "isupper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.isupper", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.iter_stats_max_iterations", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.iter_stats_max_iterations", false]], "join() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.join", false]], "join() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.join", false]], "join() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.join", false]], "join() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.join", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[146, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.json", false]], "json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.json", false]], "json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.json", false]], "json() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.json", false]], "json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.json", false]], "json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.json", false]], "json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.json", false]], "json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.json", false]], "json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.json", false]], "json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.json", false]], "json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.json", false]], "json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "kernel_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kernel_size", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kt_cache_dtype (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kt_cache_dtype", false]], "kv_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_config", false]], "kv_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.kv_cache_config", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_connector_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.kv_connector_config", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[147, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kv_transfer_sender_future_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_sender_future_timeout_ms", false]], "kv_transfer_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_timeout_ms", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcacheconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.Config", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[159, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[145, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.layernorm_quantization_plugin", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[159, "id2", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[146, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[146, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "ljust() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.ljust", false]], "ljust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.ljust", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llm_id (tensorrt_llm.llmapi.asyncllm property)": [[159, "tensorrt_llm.llmapi.AsyncLLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm attribute)": [[159, "tensorrt_llm.llmapi.LLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm property)": [[159, "id0", false]], "llm_id (tensorrt_llm.llmapi.multimodalencoder property)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.llm_id", false]], "llmargs (in module tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[147, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_balancer (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.load_balancer", false]], "load_format (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[145, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.log", false]], "log_field_changes() (tensorrt_llm.plugin.pluginconfig class method)": [[148, "tensorrt_llm.plugin.PluginConfig.log_field_changes", false]], "log_softmax() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[159, "id3", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lookaheaddecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config", false]], "lora_ckpt_source (tensorrt_llm.llmapi.lorarequest attribute)": [[159, "tensorrt_llm.llmapi.LoRARequest.lora_ckpt_source", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.lora_config", false]], "lora_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.lora_config", false]], "lora_int_id (tensorrt_llm.llmapi.lorarequest attribute)": [[159, "tensorrt_llm.llmapi.LoRARequest.lora_int_id", false]], "lora_name (tensorrt_llm.llmapi.lorarequest attribute)": [[159, "tensorrt_llm.llmapi.LoRARequest.lora_name", false]], "lora_path (tensorrt_llm.llmapi.lorarequest attribute)": [[159, "tensorrt_llm.llmapi.LoRARequest.lora_path", false]], "lora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.lora_plugin", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "lorarequest (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.LoRARequest", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_plugin", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "low_latency_gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_swiglu_plugin", false]], "lower() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.lower", false]], "lower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.lower", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lstrip() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.lstrip", false]], "lt() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.make_causal_mask", false]], "maketrans() (tensorrt_llm.llmapi.batchingtype static method)": [[159, "tensorrt_llm.llmapi.BatchingType.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.capacityschedulerpolicy static method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.contextchunkingpolicy static method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.quantalgo static method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.maketrans", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.mamba_conv1d_plugin", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.mamba_ssm_cache_dtype", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.mamba_ssm_cache_dtype", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MambaForCausalLM", false]], "manage_weights (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.manage_weights", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[159, "id14", false]], "max_concurrency (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_concurrency", false]], "max_draft_len (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_gpu_total_bytes (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.max_gpu_total_bytes", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_input_len", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_streams (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.max_num_streams", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_num_tokens", false]], "max_prompt_adapter_token (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_prompt_adapter_token", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[159, "id15", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_tokens_in_buffer (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_tokens_in_buffer", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_total_draft_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.maximum", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.maybe_to_pybind", false]], "mean() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusadecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.meshgrid2d", false]], "metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.metrics", false]], "min() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.minimum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_enum", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_fields", false]], "mish (class in tensorrt_llm.layers.activation)": [[146, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[146, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[145, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.MLPType", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.mm_embedding_handle", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput property)": [[159, "id8", false]], "mm_encoder_only (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.mm_encoder_only", false]], "mnnvl (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.MNNVL", false]], "model": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.model", false]], "model (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.model", false]], "model_computed_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_computed_fields", false]], "model_config (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_config", false]], "model_construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_construct", false]], "model_copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_copy", false]], "model_dump() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump", false]], "model_dump_json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump_json", false]], "model_extra (tensorrt_llm.llmapi.attentiondpconfig property)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.autodecodingconfig property)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.buildconfig property)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.calibconfig property)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cudagraphconfig property)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.eagledecodingconfig property)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.kvcacheconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.medusadecodingconfig property)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.moeconfig property)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.schedulerconfig property)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.torchcompileconfig property)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_extra", false]], "model_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields", false]], "model_fields_set (tensorrt_llm.llmapi.attentiondpconfig property)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.autodecodingconfig property)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.buildconfig property)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.calibconfig property)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cudagraphconfig property)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.eagledecodingconfig property)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.kvcacheconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.medusadecodingconfig property)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.moeconfig property)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.schedulerconfig property)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.torchcompileconfig property)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields_set", false]], "model_format (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.model_format", false]], "model_format (tensorrt_llm.llmapi.trtllmargs property)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.model_format", false]], "model_json_schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_json_schema", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_parametrized_name", false]], "model_post_init() (tensorrt_llm.llmapi.attentiondpconfig method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cudagraphconfig method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.kvcacheconfig method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.moeconfig method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.schedulerconfig method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.torchcompileconfig method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.model_post_init", false]], "model_rebuild() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_rebuild", false]], "model_validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate", false]], "model_validate_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_json", false]], "model_validate_strings() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_strings", false]], "modelconfig (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[145, "module-tensorrt_llm", false], [145, "module-tensorrt_llm.functional", false], [146, "module-tensorrt_llm", false], [146, "module-tensorrt_llm.layers.activation", false], [146, "module-tensorrt_llm.layers.attention", false], [146, "module-tensorrt_llm.layers.cast", false], [146, "module-tensorrt_llm.layers.conv", false], [146, "module-tensorrt_llm.layers.embedding", false], [146, "module-tensorrt_llm.layers.linear", false], [146, "module-tensorrt_llm.layers.mlp", false], [146, "module-tensorrt_llm.layers.normalization", false], [146, "module-tensorrt_llm.layers.pooling", false], [147, "module-tensorrt_llm", false], [147, "module-tensorrt_llm.models", false], [148, "module-tensorrt_llm", false], [148, "module-tensorrt_llm.plugin", false], [149, "module-tensorrt_llm", false], [149, "module-tensorrt_llm.quantization", false], [150, "module-tensorrt_llm", false], [150, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[145, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.moe_cluster_parallel_size", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.moe_cluster_parallel_size", false]], "moe_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.moe_config", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.moe_expert_parallel_size", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.moe_expert_parallel_size", false]], "moe_finalize_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.moe_plugin", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.moe_tensor_parallel_size", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.moe_tensor_parallel_size", false]], "moeallreduceparams (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.MoEAllReduceParams", false]], "moeconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MoeConfig", false]], "moeconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MoeConfig.Config", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpi_session (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.mpi_session", false]], "mpi_session (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.mpi_session", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtp_eagle_one_model (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.mtp_eagle_one_model", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mtpdecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.Config", false]], "mul() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodal_embedding_handles (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_embedding_handles", false]], "multimodal_hashes (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_hashes", false]], "multimodalencoder (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiple_profiles (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.multiple_profiles", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[146, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[145, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.llmapi.lorarequest property)": [[159, "tensorrt_llm.llmapi.LoRARequest.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[150, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[147, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "nccl_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.nccl_plugin", false]], "nccl_symmetric (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.ndim", false]], "needs_separate_short_long_cuda_graphs() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.needs_separate_short_long_cuda_graphs", false]], "needs_separate_short_long_cuda_graphs() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.needs_separate_short_long_cuda_graphs", false]], "network (tensorrt_llm.functional.tensor property)": [[145, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "ngramdecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.Config", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.nonzero", false]], "norm_quant_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.norm_quant_fusion", false]], "normalize_log_probs (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.normalize_log_probs", false]], "not_op() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_capture_layers (tensorrt_llm.llmapi.eagledecodingconfig property)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_nextn_predict_layers_from_model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers_from_model_config", false]], "num_postprocess_workers (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.num_postprocess_workers", false]], "num_postprocess_workers (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.num_postprocess_workers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[150, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvfp4_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.NVFP4_AWQ", false]], "nvfp4_gemm_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.nvfp4_gemm_config", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.OPTModel", false]], "orchestrator_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.orchestrator_type", false]], "orchestrator_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.orchestrator_type", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.otlp_traces_endpoint", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.otlp_traces_endpoint", false]], "outer() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_directory (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.output_directory", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "outputs (tensorrt_llm.llmapi.requestoutput property)": [[159, "id9", false]], "pad() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "page_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size", false]], "paged_kv_cache (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.paged_kv_cache", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.paged_state", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "parallel_config (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.parallel_config", false]], "parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.parallel_config", false]], "params_imply_greedy_decoding() (tensorrt_llm.llmapi.samplingparams static method)": [[159, "tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding", false]], "parse_file() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_file", false]], "parse_obj() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_obj", false]], "parse_raw() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_raw", false]], "partition() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.partition", false]], "partition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.partition", false]], "path (tensorrt_llm.llmapi.lorarequest property)": [[159, "tensorrt_llm.llmapi.LoRARequest.path", false]], "peft_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.peft_cache_config", false]], "peft_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.peft_cache_config", false]], "perf_metrics_max_requests (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.perf_metrics_max_requests", false]], "permute() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.PhiModel", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.pipeline_parallel_size", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.pipeline_parallel_size", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[145, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postproc_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.postproc_params", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[146, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[146, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[146, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[146, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.postprocess_tokenizer_dir", false]], "pow() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pp_partition (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.pp_partition", false]], "pp_partition (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.pp_partition", false]], "pp_reduce_scatter (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.pp_reduce_scatter", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[145, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[147, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[147, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[147, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[147, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[147, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[147, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[147, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[147, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[147, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[147, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[147, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[150, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[159, "id10", false]], "prompt_budget (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget", false]], "prompt_ignore_length (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.prompt_ignore_length", false]], "prompt_ignore_length (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.prompt_ignore_length", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput property)": [[159, "id11", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "pybind_equals() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.schedulerconfig static method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.pybind_equals", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "qserve_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.qserve_gemm_plugin", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[147, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_config (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.quant_config", false]], "quant_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.quant_config", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[159, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[147, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[149, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[147, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[147, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[147, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[147, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[147, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[147, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[149, "tensorrt_llm.quantization.quantize_and_export", false]], "quantize_per_token_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.quantize_per_token_plugin", false]], "quantize_tensor_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.quantize_tensor_plugin", false]], "quantmode (class in tensorrt_llm.quantization)": [[149, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.rank", false]], "ray_placement_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.ray_placement_config", false]], "ray_worker_extension_cls (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls", false]], "rearrange() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.rearrange", false]], "reasoning_parser (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser", false]], "reasoning_parser (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.reasoning_parser", false]], "record_stats() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.record_stats", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforllamalm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.ReDrafterForLLaMALM", false]], "redrafterforqwenlm (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.ReDrafterForQWenLM", false]], "reduce() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.reduce", false]], "reduce_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.reduce_fusion", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.release", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[147, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "removeprefix() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.removeprefix", false]], "removesuffix() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.removesuffix", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.replace", false]], "replace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.replace", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_id (tensorrt_llm.llmapi.requestoutput property)": [[159, "id12", false]], "request_perf_metrics (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.request_perf_metrics", false]], "request_perf_metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.request_perf_metrics", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.request_stats_max_iterations", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[159, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RequestOutput", false]], "requestoutput.postprocworker (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker", false]], "requestoutput.postprocworker.input (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input", false]], "requestoutput.postprocworker.output (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output", false]], "res (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.res", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[145, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "resource_manager (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager", false]], "result() (tensorrt_llm.llmapi.requestoutput method)": [[159, "tensorrt_llm.llmapi.RequestOutput.result", false]], "resume() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.resume", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.return_perf_metrics", false]], "revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.revision", false]], "revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.revision", false]], "rfind() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rfind", false]], "rfind() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rfind", false]], "rg_lru() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.rg_lru", false]], "rindex() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rindex", false]], "rindex() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rindex", false]], "rjust() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rjust", false]], "rjust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rjust", false]], "rms_norm() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[145, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rmsnorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.rmsnorm_quantization_plugin", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[147, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[147, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[147, "tensorrt_llm.models.RobertaModel", false]], "rocketsparseattentionconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig", false]], "rocketsparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[145, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[146, "tensorrt_llm.layers.linear.RowLinear", false]], "rpartition() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rpartition", false]], "rsp (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.rsp", false]], "rsplit() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rsplit", false]], "rstrip() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.rstrip", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[150, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[150, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[150, "tensorrt_llm.runtime.Session.runtime", false]], "sampler_force_async_worker (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.sampler_force_async_worker", false]], "sampler_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.sampler_type", false]], "sampling_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.SamplingParams", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[147, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[147, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "save_hidden_states (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.SAVE_HIDDEN_STATES", false]], "savehiddenstatesdecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig", false]], "savehiddenstatesdecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config", false]], "scatter() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.scatter_nd", false]], "scheduler_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config", false]], "scheduler_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.scheduler_config", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.SchedulerConfig", false]], "schedulerconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.Config", false]], "schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.schema", false]], "schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema", false]], "schema() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.schema", false]], "schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.schema", false]], "schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema", false]], "schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.schema", false]], "schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.schema", false]], "schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.schema", false]], "schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.schema", false]], "schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema", false]], "schema_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema_json", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[146, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.send", false]], "seq_len_threshold (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.seq_len_threshold", false]], "seq_len_threshold (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.seq_len_threshold", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[150, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_context_fmha() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_context_fmha", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.set_default_max_input_len", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.set_default_max_input_len", false]], "set_dora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_dora_plugin", false]], "set_fp8_rowwise_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_fp8_rowwise_quant_plugins", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_lora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_lora_plugin", false]], "set_nccl_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_nccl_plugin", false]], "set_qserve_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_qserve_plugins", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[146, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[150, "tensorrt_llm.runtime.Session.set_shapes", false]], "set_smooth_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_async() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.setup_async", false]], "setup_embedding_parallel_mode() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[145, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[150, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[159, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.multimodalencoder method)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.shutdown", false]], "shutdown_abort() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.shutdown_abort", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_indexer_for_short_seqs (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.skip_indexer_for_short_seqs", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.skip_tokenizer_init", false]], "slice() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[145, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smooth_quant_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.smooth_quant_gemm_plugin", false]], "smooth_quant_plugins (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.smooth_quant_plugins", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "sparse_attention_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.sparse_attention_config", false]], "sparse_attention_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.sparse_attention_config", false]], "spec_dec_mode (tensorrt_llm.llmapi.autodecodingconfig property)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.eagledecodingconfig property)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.medusadecodingconfig property)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.spec_dec_mode", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[146, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_config", false]], "speculative_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_config", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculative_model_dir (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.trtllmargs property)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.speculative_model_dir", false]], "speculative_model_format (tensorrt_llm.llmapi.torchllmargs property)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_format", false]], "speculative_model_format (tensorrt_llm.llmapi.trtllmargs property)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_format", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.split", false]], "split() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.split", false]], "split() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.split", false]], "split() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.split", false]], "split() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "splitlines() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.splitlines", false]], "sqrt() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[150, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.start", false]], "start() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.start", false]], "startswith() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.startswith", false]], "startswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.startswith", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[159, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[150, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stream_interval (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.stream_interval", false]], "streaming (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[159, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.streaming", false]], "streamingllm (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.streamingllm", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[145, "tensorrt_llm.functional.SliceInputType.stride", false]], "strip() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.strip", false]], "strip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.strip", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[159, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[159, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.sum", false]], "supports_backend() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.supports_backend", false]], "swapcase() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.swapcase", false]], "swiglu() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.swiglu", false]], "symm_mem (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.SYMM_MEM", false]], "sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype", false]], "tanh() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.Tensor", false]], "tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.tensor_parallel_size", false]], "tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.tensor_parallel_size", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[150, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[145, "module-tensorrt_llm", false], [146, "module-tensorrt_llm", false], [147, "module-tensorrt_llm", false], [148, "module-tensorrt_llm", false], [149, "module-tensorrt_llm", false], [150, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[145, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[146, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[146, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[146, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[146, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[146, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[146, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[146, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[146, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[146, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[147, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[148, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[149, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[150, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachesaltidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::default (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::mpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::nixl (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::ucx (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getbackendtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfertimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxtokensinbuffer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mbackendtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfersenderfuturetimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfertimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxtokensinbuffer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setbackendtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfertimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxtokensinbuffer (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mfailfastonattentionwindowtoolarge (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kllguidance (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecodingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::multithread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::executeloopbackrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::~baseloopbackagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::gethasindexerkcache (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerdimperhead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerkcachequantblocksize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mhasindexerkcache (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerdimperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerkcachequantblocksize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mattentionlayernumperpp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mcontextparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::isrunning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", false]], "tensorrt_llm::executor::kv_cache::filedesc::fd (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", false]], "tensorrt_llm::executor::kv_cache::filedesc::filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::getfd (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::filedesc::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::~filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", false]], "tensorrt_llm::executor::kv_cache::filedescs::filedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", false]], "tensorrt_llm::executor::kv_cache::filedescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::filedescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::makeloopbackagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxgputotalbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::getuseuvm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", false]], "tensorrt_llm::executor::kvcacheconfig::kdefaultgpumemfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::mattentiondpeventsgatherperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxgputotalbytes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::museuvm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", false]], "tensorrt_llm::executor::kvcacheconfig::setattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxgputotalbytes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setuseuvm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::attentiondprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheevent::windowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::mmkeys (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::mmkey (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5MmKeyE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::multimodalinput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalhashes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodallengths (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalpositions (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodallengths (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalpositions (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", false]], "tensorrt_llm::executor::multimodalinput::multimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getcachesaltid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getmultimodalinput (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setcachesaltid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setmultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::avgdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getpromptignorelength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mpromptignorelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeblockkey (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachecreateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheeventdiff (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheremoveddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoreddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeuniquetoken (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::cachesaltidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::clearvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::aligned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::backgroundconfiguration (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::configuration (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::malignment (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration10mAlignmentE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackground (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mtag (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudavirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::deallocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::cpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::memset (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::none (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::pinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::_release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::~configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configuratorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurators (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::~creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creatorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::invalid_state (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::materialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mconfigurators (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mcreator (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mhandle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::errored (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::invalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::materialized (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::released (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::~cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::add (c++ function)": [[1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::addbadhandle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mentryit (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mmemory (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::materializewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mbadhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mentries (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmemories (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::pointermemorymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::releasewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::remove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::retrievebadhandles (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::tagentrymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::unsaferemove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectioninput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectionoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::geteaglebuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getexplicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgenerationsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlookaheadbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxnumsequences (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapecacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setgenerationsteps (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirection (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::getvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::getvirtualmemorymanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::localcreator (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", false]], "tensorrt_llm::runtime::localcreator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", false]], "tensorrt_llm::runtime::localcreator::localcreator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", false]], "tensorrt_llm::runtime::localcreator::mprop (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", false]], "tensorrt_llm::runtime::localcreator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", false]], "tensorrt_llm::runtime::localcreator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::memsetconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", false]], "tensorrt_llm::runtime::memsetconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", false]], "tensorrt_llm::runtime::memsetconfigurator::memsetconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", false]], "tensorrt_llm::runtime::memsetconfigurator::mfirsttime (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", false]], "tensorrt_llm::runtime::memsetconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", false]], "tensorrt_llm::runtime::memsetconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", false]], "tensorrt_llm::runtime::memsetconfigurator::mvalue (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", false]], "tensorrt_llm::runtime::memsetconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::memsetconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsforgivenlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::multicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", false]], "tensorrt_llm::runtime::multicastconfigurator::mbindoffset (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", false]], "tensorrt_llm::runtime::multicastconfigurator::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", false]], "tensorrt_llm::runtime::multicastconfigurator::mmulticast (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", false]], "tensorrt_llm::runtime::multicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::multicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::multicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::offloadconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", false]], "tensorrt_llm::runtime::offloadconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbackedstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbacktype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mondemand (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", false]], "tensorrt_llm::runtime::offloadconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", false]], "tensorrt_llm::runtime::offloadconfigurator::offloadconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", false]], "tensorrt_llm::runtime::offloadconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::offloadconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::promptignorelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::unicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", false]], "tensorrt_llm::runtime::unicastconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", false]], "tensorrt_llm::runtime::unicastconfigurator::mdesc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", false]], "tensorrt_llm::runtime::unicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::unicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::unicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::unicastconfigurator::unicastconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[159, "id4", false]], "timeout_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.timeout_iters", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[146, "tensorrt_llm.layers.embedding.Timesteps", false]], "title() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.title", false]], "title() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.title", false]], "title() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.title", false]], "title() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.title", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[159, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[159, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[147, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[147, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[147, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[147, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[147, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[147, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[147, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[147, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[147, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[147, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[146, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[159, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[159, "id5", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.asyncllm property)": [[159, "tensorrt_llm.llmapi.AsyncLLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[159, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[159, "id1", false]], "tokenizer (tensorrt_llm.llmapi.multimodalencoder property)": [[159, "tensorrt_llm.llmapi.MultimodalEncoder.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[159, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokenizer_mode (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_mode", false]], "tokenizer_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_mode", false]], "tokenizer_revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_revision", false]], "tokenizer_revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_revision", false]], "tokens_per_block (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk", false]], "topk() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.topk", false]], "topr (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr", false]], "torch_compile_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_config", false]], "torchcompileconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig", false]], "torchcompileconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.Config", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "torchllmargs.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.Config", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[146, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[146, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[146, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[159, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "translate() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.translate", false]], "translate() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.translate", false]], "transpose() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-bench command line option": [[22, "cmdoption-trtllm-bench-log_level", false], [22, "cmdoption-trtllm-bench-m", false], [22, "cmdoption-trtllm-bench-model_path", false], [22, "cmdoption-trtllm-bench-revision", false], [22, "cmdoption-trtllm-bench-w", false]], "trtllm-bench-build command line option": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-build-no_weights_loading", false], [22, "cmdoption-trtllm-bench-build-pp", false], [22, "cmdoption-trtllm-bench-build-q", false], [22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-build-tp", false], [22, "cmdoption-trtllm-bench-build-trust_remote_code", false]], "trtllm-bench-latency command line option": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-latency-config", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-medusa_choices", false], [22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-latency-warmup", false]], "trtllm-bench-throughput command line option": [[22, "cmdoption-trtllm-bench-throughput-backend", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-config", false], [22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [22, "cmdoption-trtllm-bench-throughput-data_device", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false], [22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-eos_id", false], [22, "cmdoption-trtllm-bench-throughput-ep", false], [22, "cmdoption-trtllm-bench-throughput-image_data_format", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-modality", false], [22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-output_json", false], [22, "cmdoption-trtllm-bench-throughput-pp", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false], [22, "cmdoption-trtllm-bench-throughput-request_json", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false], [22, "cmdoption-trtllm-bench-throughput-streaming", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-tp", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "trtllm-eval command line option": [[24, "cmdoption-trtllm-eval-backend", false], [24, "cmdoption-trtllm-eval-config", false], [24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false], [24, "cmdoption-trtllm-eval-ep_size", false], [24, "cmdoption-trtllm-eval-gpus_per_node", false], [24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [24, "cmdoption-trtllm-eval-log_level", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_beam_width", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [24, "cmdoption-trtllm-eval-model", false], [24, "cmdoption-trtllm-eval-pp_size", false], [24, "cmdoption-trtllm-eval-revision", false], [24, "cmdoption-trtllm-eval-tokenizer", false], [24, "cmdoption-trtllm-eval-tp_size", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false]], "trtllm-eval-cnn_dailymail command line option": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false]], "trtllm-eval-gpqa_diamond command line option": [[24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false]], "trtllm-eval-gpqa_extended command line option": [[24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false]], "trtllm-eval-gpqa_main command line option": [[24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false]], "trtllm-eval-gsm8k command line option": [[24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false]], "trtllm-eval-json_mode_eval command line option": [[24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false]], "trtllm-eval-longbench_v2 command line option": [[24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-longbench_v2-cot", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false], [24, "cmdoption-trtllm-eval-longbench_v2-domain", false], [24, "cmdoption-trtllm-eval-longbench_v2-length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_len", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-no_context", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-rag", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false]], "trtllm-eval-mmlu command line option": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false]], "trtllm-eval-mmmu command line option": [[24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "trtllm-serve-disaggregated command line option": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false], [27, "cmdoption-trtllm-serve-disaggregated-r", false], [27, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-mm_embedding_serve command line option": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false]], "trtllm-serve-serve command line option": [[27, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-backend", false], [27, "cmdoption-trtllm-serve-serve-chat_template", false], [27, "cmdoption-trtllm-serve-serve-config", false], [27, "cmdoption-trtllm-serve-serve-context_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false], [27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false], [27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false], [27, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-host", false], [27, "cmdoption-trtllm-serve-serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false], [27, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-port", false], [27, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [27, "cmdoption-trtllm-serve-serve-revision", false], [27, "cmdoption-trtllm-serve-serve-server_role", false], [27, "cmdoption-trtllm-serve-serve-tensor_parallel_size", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tool_parser", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "trtllmargs.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.Config", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "trust_remote_code (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.trust_remote_code", false]], "trust_remote_code (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.trust_remote_code", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[145, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[147, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[147, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[150, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_forward_refs() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.update_forward_refs", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[159, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[150, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[145, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "update_weights() (tensorrt_llm.llmapi.asyncllm method)": [[159, "tensorrt_llm.llmapi.AsyncLLM.update_weights", false]], "upper() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.upper", false]], "upper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.upper", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[150, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[159, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_fp8_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.use_fp8_context_fmha", false]], "use_fused_mlp (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.use_fused_mlp", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[147, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[147, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[147, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[147, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[147, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[147, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[147, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_low_precision_moe_combine (tensorrt_llm.llmapi.moeconfig attribute)": [[159, "tensorrt_llm.llmapi.MoeConfig.use_low_precision_moe_combine", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[159, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_mtp_vanilla (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.use_mtp_vanilla", false]], "use_paged_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.use_paged_context_fmha", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[147, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "use_uvm (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.use_uvm", false]], "user_buffer (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.user_buffer", false]], "user_provided (tensorrt_llm.models.speculativedecodingmode attribute)": [[147, "tensorrt_llm.models.SpeculativeDecodingMode.USER_PROVIDED", false]], "userprovideddecodingconfig (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig", false]], "userprovideddecodingconfig.config (class in tensorrt_llm.llmapi)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config", false]], "validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[159, "tensorrt_llm.llmapi.AttentionDpConfig.validate", false]], "validate() (tensorrt_llm.llmapi.autodecodingconfig method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.buildconfig class method)": [[159, "tensorrt_llm.llmapi.BuildConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[159, "tensorrt_llm.llmapi.CacheTransceiverConfig.validate", false]], "validate() (tensorrt_llm.llmapi.calibconfig class method)": [[159, "tensorrt_llm.llmapi.CalibConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.validate", false]], "validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[159, "tensorrt_llm.llmapi.DynamicBatchConfig.validate", false]], "validate() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[159, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.validate", false]], "validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.validate", false]], "validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.moeconfig class method)": [[159, "tensorrt_llm.llmapi.MoeConfig.validate", false]], "validate() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[159, "tensorrt_llm.llmapi.SchedulerConfig.validate", false]], "validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.validate", false]], "validate() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate", false]], "validate() (tensorrt_llm.plugin.pluginconfig method)": [[148, "tensorrt_llm.plugin.PluginConfig.validate", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_and_init_tokenizer", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_and_init_tokenizer", false]], "validate_attention_dp_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_attention_dp_config", false]], "validate_batch_wait_max_tokens_ratio() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_max_tokens_ratio", false]], "validate_batch_wait_timeout_iters() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_iters", false]], "validate_batch_wait_timeout_ms() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_ms", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_remaining", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_with_runtime_params", false]], "validate_capture_num_tokens() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.validate_capture_num_tokens", false]], "validate_checkpoint_format() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_checkpoint_format", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[159, "tensorrt_llm.llmapi.CudaGraphConfig.validate_cuda_graph_max_batch_size", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[159, "tensorrt_llm.llmapi.AutoDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[159, "tensorrt_llm.llmapi.EagleDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.MTPDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.NGramDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_dtype() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype", false]], "validate_dtype() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype", false]], "validate_dtype_not_auto() (tensorrt_llm.plugin.pluginconfig class method)": [[148, "tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto", false]], "validate_enable_build_cache() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache", false]], "validate_free_gpu_memory_fraction() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.validate_free_gpu_memory_fraction", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node", false]], "validate_helix_tokens_per_block() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_helix_tokens_per_block", false]], "validate_kv_cache_dtype() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype", false]], "validate_load_balancer() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_lora_config_consistency", false]], "validate_max_attention_window() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_attention_window", false]], "validate_max_gpu_total_bytes() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[159, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_gpu_total_bytes", false]], "validate_misc() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_misc", false]], "validate_model() (tensorrt_llm.llmapi.torchllmargs class method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model", false]], "validate_model() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model_format_misc", false]], "validate_parallel_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config", false]], "validate_parallel_config() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_parallel_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_peft_cache_config", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[159, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "validate_quant_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config", false]], "validate_ray_placement_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_placement_config", false]], "validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls", false]], "validate_runtime_args() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args", false]], "validate_runtime_args() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_runtime_args", false]], "validate_speculative_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config", false]], "validate_speculative_config() (tensorrt_llm.llmapi.trtllmargs method)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.validate_speculative_config", false]], "validate_stream_interval() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_stream_interval", false]], "validate_torch_compile_config() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.validate_torch_compile_config", false]], "validate_torch_compile_max_num_streams() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[159, "tensorrt_llm.llmapi.TorchCompileConfig.validate_torch_compile_max_num_streams", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[147, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[145, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[150, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[150, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[150, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[150, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[150, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[150, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a16_mxfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A16_MXFP4", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_mxfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_FP8", false]], "w4a8_mxfp4_mxfp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_MXFP8", false]], "w4a8_nvfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_NVFP4_FP8", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[159, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "warn_on_unstable_feature_usage() (tensorrt_llm.llmapi.torchllmargs method)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[146, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[146, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[146, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_only_groupwise_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.weight_only_groupwise_quant_matmul_plugin", false]], "weight_only_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[148, "tensorrt_llm.plugin.PluginConfig.weight_only_quant_matmul_plugin", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[159, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[145, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[147, "tensorrt_llm.models.WhisperEncoder", false]], "window_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[159, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size", false]], "with_traceback() (tensorrt_llm.llmapi.requesterror method)": [[159, "tensorrt_llm.llmapi.RequestError.with_traceback", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[159, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[159, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "write_interval (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[159, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.write_interval", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[145, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[145, "tensorrt_llm.functional.RotaryScalingType.yarn", false]], "zfill() (tensorrt_llm.llmapi.batchingtype method)": [[159, "tensorrt_llm.llmapi.BatchingType.zfill", false]], "zfill() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[159, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[159, "tensorrt_llm.llmapi.ContextChunkingPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.quantalgo method)": [[159, "tensorrt_llm.llmapi.QuantAlgo.zfill", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", "tensorrt_llm::executor::CacheSaltIDType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::DEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::MPI"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::NIXL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::UCX"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::backendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", "tensorrt_llm::executor::CacheTransceiverConfig::getBackendType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxTokensInBuffer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::mBackendType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferSenderFutureTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxTokensInBuffer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType::backendType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs::kvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs::kvTransferTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer::maxTokensInBuffer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::failFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", "tensorrt_llm::executor::ExecutorConfig::getFailFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", "tensorrt_llm::executor::ExecutorConfig::mFailFastOnAttentionWindowTooLarge"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge::failFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kLLGUIDANCE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", "tensorrt_llm::executor::IterationStats::specDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::attentionDpRank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", "tensorrt_llm::executor::KVCacheEvent::attentionDpRank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", "tensorrt_llm::executor::KVCacheEvent::windowSize"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::mmKeys"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32NSt6vectorI5MmKeyEE", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6mmKeysE", "tensorrt_llm::executor::KVCacheStoredBlockData::mmKeys"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::attentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::useUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", "tensorrt_llm::executor::KvCacheConfig::getAttentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", "tensorrt_llm::executor::KvCacheConfig::getMaxGpuTotalBytes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", "tensorrt_llm::executor::KvCacheConfig::getUseUvm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", "tensorrt_llm::executor::KvCacheConfig::kDefaultGpuMemFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", "tensorrt_llm::executor::KvCacheConfig::mAttentionDpEventsGatherPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", "tensorrt_llm::executor::KvCacheConfig::mMaxGpuTotalBytes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", "tensorrt_llm::executor::KvCacheConfig::mUseUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs::attentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes::maxGpuTotalBytes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm::useUvm"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5MmKeyE", "tensorrt_llm::executor::MmKey"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", "tensorrt_llm::executor::MultimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalHashes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalLengths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalPositions"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalHashes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalLengths"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalPositions"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", "tensorrt_llm::executor::MultimodalInput::mMultimodalHashes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalLengths"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalPositions"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::cacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", "tensorrt_llm::executor::Request::getCacheSaltID"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", "tensorrt_llm::executor::Request::getMultimodalInput"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID::cacheSaltID"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput::multimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", "tensorrt_llm::executor::Result::avgDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::promptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", "tensorrt_llm::executor::SamplingConfig::getPromptIgnoreLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", "tensorrt_llm::executor::SamplingConfig::mPromptIgnoreLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize::kvCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::multiThread"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::fileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::isOffload"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::memoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::~BaseLoopbackAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator==::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mAttentionLayerNumPerPP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mContextParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", "tensorrt_llm::executor::kv_cache::CacheState::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", "tensorrt_llm::executor::kv_cache::CacheState::getHasIndexerKCache"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerDimPerHead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerKCacheQuantBlockSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", "tensorrt_llm::executor::kv_cache::CacheState::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", "tensorrt_llm::executor::kv_cache::CacheState::mHasIndexerKCache"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerDimPerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerKCacheQuantBlockSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager9isRunningEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::isRunning"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", "tensorrt_llm::executor::kv_cache::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::filename"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::flags"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::mode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", "tensorrt_llm::executor::kv_cache::FileDesc::fd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", "tensorrt_llm::executor::kv_cache::FileDesc::getFd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", "tensorrt_llm::executor::kv_cache::FileDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", "tensorrt_llm::executor::kv_cache::FileDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", "tensorrt_llm::executor::kv_cache::FileDesc::~FileDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", "tensorrt_llm::executor::kv_cache::FileDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs::descs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::FileDescs::getDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", "tensorrt_llm::executor::kv_cache::FileDescs::mDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::backend"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::other"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::~Configurator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ConfiguratorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurators"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::create"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::~Creator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CreatorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID_STATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::RELEASED"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::INVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::RELEASED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release::destructing"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mConfigurators"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mCreator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::materialize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator bool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator=::other"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::release"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::status"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::~CUDAVirtualMemoryChunk"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", "tensorrt_llm::runtime::CacheSaltIDType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CPU"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::background"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration7alignedENSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::aligned::n"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::backgroundConfiguration"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration10mAlignmentE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mAlignment"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackground"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mTag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::tag"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator::config"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::PINNED"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Pointer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::CPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::PINNED"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::device"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::ptr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::mConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::operator bool"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryManager"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mEntryIt"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::PointerMemoryMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::TagEntryMap"], [1, 3, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 8, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::Configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::memory"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle::handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mBadHandles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mEntries"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMemories"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMutex"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", "tensorrt_llm::runtime::CudaVirtualMemoryManager::retrieveBadHandles"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove::handle"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", "tensorrt_llm::runtime::GptDecoder::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::prop"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::size"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator::count"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", "tensorrt_llm::runtime::LocalCreator::create"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", "tensorrt_llm::runtime::LocalCreator::mProp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", "tensorrt_llm::runtime::LocalCreator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::handle"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", "tensorrt_llm::runtime::MemsetConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::value"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", "tensorrt_llm::runtime::MemsetConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", "tensorrt_llm::runtime::MemsetConfigurator::mFirstTime"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", "tensorrt_llm::runtime::MemsetConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", "tensorrt_llm::runtime::MemsetConfigurator::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", "tensorrt_llm::runtime::MemsetConfigurator::mValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MemsetConfigurator::setup"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MemsetConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::layers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", "tensorrt_llm::runtime::MulticastConfigurator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", "tensorrt_llm::runtime::MulticastConfigurator::mBindOffset"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", "tensorrt_llm::runtime::MulticastConfigurator::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", "tensorrt_llm::runtime::MulticastConfigurator::mMulticast"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", "tensorrt_llm::runtime::MulticastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MulticastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", "tensorrt_llm::runtime::OffloadConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::backType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::ondemand"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::stream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", "tensorrt_llm::runtime::OffloadConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", "tensorrt_llm::runtime::OffloadConfigurator::mBackType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", "tensorrt_llm::runtime::OffloadConfigurator::mBackedStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", "tensorrt_llm::runtime::OffloadConfigurator::mOndemand"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", "tensorrt_llm::runtime::OffloadConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", "tensorrt_llm::runtime::OffloadConfigurator::mStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::handle"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", "tensorrt_llm::runtime::SamplingConfig::promptIgnoreLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", "tensorrt_llm::runtime::UnicastConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::desc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", "tensorrt_llm::runtime::UnicastConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", "tensorrt_llm::runtime::UnicastConfigurator::mDesc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", "tensorrt_llm::runtime::UnicastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::UnicastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::clearVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getEagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getExplicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGenerationSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getLookaheadBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxNumSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::beamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps::generationSteps"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxNumSequences"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers::bufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::getVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", "tensorrt_llm::runtime::getVirtualMemoryManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [150, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[145, 9, 0, "-", "functional"], [147, 9, 0, "-", "models"], [148, 9, 0, "-", "plugin"], [149, 9, 0, "-", "quantization"], [150, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[145, 10, 1, "", "AllReduceFusionOp"], [145, 10, 1, "", "AllReduceParams"], [145, 10, 1, "", "AllReduceStrategy"], [145, 10, 1, "", "AttentionMaskType"], [145, 10, 1, "", "Conditional"], [145, 10, 1, "", "DimRange"], [145, 10, 1, "", "LayerNormPositionType"], [145, 10, 1, "", "LayerNormType"], [145, 10, 1, "", "MLPType"], [145, 10, 1, "", "MoEAllReduceParams"], [145, 10, 1, "", "PositionEmbeddingType"], [145, 10, 1, "", "RopeEmbeddingUtils"], [145, 10, 1, "", "RotaryScalingType"], [145, 10, 1, "", "SideStreamIDType"], [145, 10, 1, "", "SliceInputType"], [145, 10, 1, "", "Tensor"], [145, 14, 1, "", "abs"], [145, 14, 1, "", "activation"], [145, 14, 1, "", "add"], [145, 14, 1, "", "allgather"], [145, 14, 1, "", "allreduce"], [145, 14, 1, "", "arange"], [145, 14, 1, "", "argmax"], [145, 14, 1, "", "assertion"], [145, 14, 1, "", "avg_pool2d"], [145, 14, 1, "", "bert_attention"], [145, 14, 1, "", "broadcast_helper"], [145, 14, 1, "", "cast"], [145, 14, 1, "", "categorical_sample"], [145, 14, 1, "", "chunk"], [145, 14, 1, "", "clip"], [145, 14, 1, "", "concat"], [145, 14, 1, "", "constant"], [145, 14, 1, "", "constant_to_tensor_"], [145, 14, 1, "", "constants_to_tensors_"], [145, 14, 1, "", "conv1d"], [145, 14, 1, "", "conv2d"], [145, 14, 1, "", "conv3d"], [145, 14, 1, "", "conv_transpose2d"], [145, 14, 1, "", "cos"], [145, 14, 1, "", "cp_split_plugin"], [145, 14, 1, "", "create_allreduce_plugin"], [145, 14, 1, "", "cuda_stream_sync"], [145, 14, 1, "", "cumsum"], [145, 14, 1, "", "div"], [145, 14, 1, "", "dora_plugin"], [145, 14, 1, "", "einsum"], [145, 14, 1, "", "elementwise_binary"], [145, 14, 1, "", "embedding"], [145, 14, 1, "", "eq"], [145, 14, 1, "", "exp"], [145, 14, 1, "", "expand"], [145, 14, 1, "", "expand_dims"], [145, 14, 1, "", "expand_dims_like"], [145, 14, 1, "", "expand_mask"], [145, 14, 1, "", "flatten"], [145, 14, 1, "", "flip"], [145, 14, 1, "", "floordiv"], [145, 14, 1, "", "gather"], [145, 14, 1, "", "gather_last_token_logits"], [145, 14, 1, "", "gather_nd"], [145, 14, 1, "", "gegelu"], [145, 14, 1, "", "geglu"], [145, 14, 1, "", "gelu"], [145, 14, 1, "", "gemm_allreduce"], [145, 14, 1, "", "gemm_swiglu"], [145, 14, 1, "", "generate_alibi_biases"], [145, 14, 1, "", "generate_alibi_slopes"], [145, 14, 1, "", "generate_logn_scaling"], [145, 14, 1, "", "gpt_attention"], [145, 14, 1, "", "group_norm"], [145, 14, 1, "", "gt"], [145, 14, 1, "", "identity"], [145, 14, 1, "", "index_select"], [145, 14, 1, "", "int_clip"], [145, 14, 1, "", "interpolate"], [145, 14, 1, "", "is_gated_activation"], [145, 14, 1, "", "layer_norm"], [145, 14, 1, "", "log"], [145, 14, 1, "", "log_softmax"], [145, 14, 1, "", "lora_plugin"], [145, 14, 1, "", "low_latency_gemm"], [145, 14, 1, "", "low_latency_gemm_swiglu"], [145, 14, 1, "", "lt"], [145, 14, 1, "", "mamba_conv1d"], [145, 14, 1, "", "masked_scatter"], [145, 14, 1, "", "masked_select"], [145, 14, 1, "", "matmul"], [145, 14, 1, "", "max"], [145, 14, 1, "", "maximum"], [145, 14, 1, "", "mean"], [145, 14, 1, "", "meshgrid2d"], [145, 14, 1, "", "min"], [145, 14, 1, "", "minimum"], [145, 14, 1, "", "modulo"], [145, 14, 1, "", "mul"], [145, 14, 1, "", "non_gated_version"], [145, 14, 1, "", "nonzero"], [145, 14, 1, "", "not_op"], [145, 14, 1, "", "op_and"], [145, 14, 1, "", "op_or"], [145, 14, 1, "", "op_xor"], [145, 14, 1, "", "outer"], [145, 14, 1, "", "pad"], [145, 14, 1, "", "permute"], [145, 14, 1, "", "pow"], [145, 14, 1, "", "prod"], [145, 14, 1, "", "quick_gelu"], [145, 14, 1, "", "rand"], [145, 14, 1, "", "rearrange"], [145, 14, 1, "", "recv"], [145, 14, 1, "", "reduce"], [145, 14, 1, "", "reduce_scatter"], [145, 14, 1, "", "relu"], [145, 14, 1, "", "repeat"], [145, 14, 1, "", "repeat_interleave"], [145, 14, 1, "", "rg_lru"], [145, 14, 1, "", "rms_norm"], [145, 14, 1, "", "round"], [145, 14, 1, "", "scatter"], [145, 14, 1, "", "scatter_nd"], [145, 14, 1, "", "select"], [145, 14, 1, "", "selective_scan"], [145, 14, 1, "", "send"], [145, 14, 1, "", "shape"], [145, 14, 1, "", "sigmoid"], [145, 14, 1, "", "silu"], [145, 14, 1, "", "sin"], [145, 14, 1, "", "slice"], [145, 14, 1, "", "softmax"], [145, 14, 1, "", "softplus"], [145, 14, 1, "", "split"], [145, 14, 1, "", "sqrt"], [145, 14, 1, "", "squared_relu"], [145, 14, 1, "", "squeeze"], [145, 14, 1, "", "stack"], [145, 14, 1, "", "sub"], [145, 14, 1, "", "sum"], [145, 14, 1, "", "swiglu"], [145, 14, 1, "", "tanh"], [145, 14, 1, "", "topk"], [145, 14, 1, "", "transpose"], [145, 14, 1, "", "unary"], [145, 14, 1, "", "unbind"], [145, 14, 1, "", "unsqueeze"], [145, 14, 1, "", "view"], [145, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[145, 11, 1, "", "LAST_PROCESS_FOR_UB"], [145, 11, 1, "", "MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM"], [145, 11, 1, "", "NONE"], [145, 11, 1, "", "RESIDUAL_RMS_NORM"], [145, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [145, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [145, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [145, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [145, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[145, 12, 1, "", "has_affine"], [145, 12, 1, "", "has_bias"], [145, 12, 1, "", "has_scale"], [145, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[145, 11, 1, "", "AUTO"], [145, 11, 1, "", "LOWPRECISION"], [145, 11, 1, "", "MIN_LATENCY"], [145, 11, 1, "", "MNNVL"], [145, 11, 1, "", "NCCL"], [145, 11, 1, "", "NCCL_SYMMETRIC"], [145, 11, 1, "", "ONESHOT"], [145, 11, 1, "", "SYMM_MEM"], [145, 11, 1, "", "TWOSHOT"], [145, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[145, 11, 1, "", "bidirectional"], [145, 11, 1, "", "bidirectionalglm"], [145, 11, 1, "", "blocksparse"], [145, 11, 1, "", "causal"], [145, 11, 1, "", "custom_mask"], [145, 11, 1, "", "padding"], [145, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[145, 12, 1, "", "add_input"], [145, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[145, 11, 1, "", "post_layernorm"], [145, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[145, 11, 1, "", "GroupNorm"], [145, 11, 1, "", "LayerNorm"], [145, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[145, 11, 1, "", "FusedGatedMLP"], [145, 11, 1, "", "GatedMLP"], [145, 11, 1, "", "MLP"]], "tensorrt_llm.functional.MoEAllReduceParams": [[145, 12, 1, "", "is_valid"]], "tensorrt_llm.functional.PositionEmbeddingType": [[145, 11, 1, "", "alibi"], [145, 11, 1, "", "alibi_with_scale"], [145, 11, 1, "", "chatglm"], [145, 12, 1, "", "choices"], [145, 11, 1, "", "deferred"], [145, 12, 1, "", "from_string"], [145, 12, 1, "", "is_alibi"], [145, 12, 1, "", "is_deferred"], [145, 12, 1, "", "is_mrope"], [145, 12, 1, "", "is_rope"], [145, 11, 1, "", "learned_absolute"], [145, 11, 1, "", "long_rope"], [145, 11, 1, "", "mrope"], [145, 11, 1, "", "relative"], [145, 11, 1, "", "rope_gpt_neox"], [145, 11, 1, "", "rope_gptj"], [145, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[145, 12, 1, "", "apply_llama3_scaling"], [145, 12, 1, "", "apply_rotary_pos_emb"], [145, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [145, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [145, 12, 1, "", "create_fake_weight"], [145, 12, 1, "", "create_sinusoidal_positions"], [145, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [145, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [145, 12, 1, "", "create_sinusoidal_positions_long_rope"], [145, 12, 1, "", "create_sinusoidal_positions_long_rope_for_attention_plugin"], [145, 12, 1, "", "create_sinusoidal_positions_yarn"], [145, 12, 1, "", "rotate_every_two"], [145, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[145, 11, 1, "", "dynamic"], [145, 12, 1, "", "from_string"], [145, 11, 1, "", "linear"], [145, 11, 1, "", "llama3"], [145, 11, 1, "", "longrope"], [145, 11, 1, "", "mrope"], [145, 11, 1, "", "none"], [145, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[145, 11, 1, "", "disable"], [145, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[145, 11, 1, "", "axes"], [145, 11, 1, "", "data"], [145, 11, 1, "", "fill_value"], [145, 11, 1, "", "size"], [145, 11, 1, "", "start"], [145, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[145, 12, 1, "", "abs"], [145, 12, 1, "", "cast"], [145, 13, 1, "", "dtype"], [145, 12, 1, "", "flatten"], [145, 12, 1, "", "get_parent"], [145, 12, 1, "", "get_users"], [145, 12, 1, "", "is_dynamic"], [145, 12, 1, "", "is_trt_wrapper"], [145, 13, 1, "", "location"], [145, 12, 1, "", "log"], [145, 12, 1, "", "mark_output"], [145, 12, 1, "", "max"], [145, 12, 1, "", "mean"], [145, 13, 1, "", "name"], [145, 12, 1, "", "ndim"], [145, 13, 1, "", "network"], [145, 12, 1, "", "permute"], [145, 12, 1, "", "rank"], [145, 12, 1, "", "repeat"], [145, 12, 1, "", "replace_all_uses_with"], [145, 12, 1, "", "select"], [145, 13, 1, "", "shape"], [145, 12, 1, "", "size"], [145, 12, 1, "", "split"], [145, 12, 1, "", "sqrt"], [145, 12, 1, "", "squeeze"], [145, 12, 1, "", "transpose"], [145, 12, 1, "", "unbind"], [145, 12, 1, "", "unsqueeze"], [145, 12, 1, "", "view"]], "tensorrt_llm.layers": [[146, 9, 0, "-", "activation"], [146, 9, 0, "-", "attention"], [146, 9, 0, "-", "cast"], [146, 9, 0, "-", "conv"], [146, 9, 0, "-", "embedding"], [146, 9, 0, "-", "linear"], [146, 9, 0, "-", "mlp"], [146, 9, 0, "-", "normalization"], [146, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[146, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[146, 10, 1, "", "Attention"], [146, 10, 1, "", "AttentionMaskParams"], [146, 10, 1, "", "AttentionParams"], [146, 10, 1, "", "BertAttention"], [146, 10, 1, "", "BlockSparseAttnParams"], [146, 10, 1, "", "CogVLMAttention"], [146, 10, 1, "", "DeepseekV2Attention"], [146, 10, 1, "", "DiffusersAttention"], [146, 10, 1, "", "KeyValueCacheParams"], [146, 10, 1, "", "MropeParams"], [146, 10, 1, "", "SpecDecodingParams"], [146, 14, 1, "", "compute_relative_bias"], [146, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[146, 12, 1, "", "create_attention_const_params"], [146, 12, 1, "", "fill_attention_params"], [146, 12, 1, "", "forward"], [146, 12, 1, "", "postprocess"], [146, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[146, 12, 1, "", "fill_attention_const_params_for_long_rope"], [146, 12, 1, "", "fill_attention_const_params_for_rope"], [146, 12, 1, "", "is_valid"], [146, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[146, 12, 1, "", "forward"], [146, 12, 1, "", "postprocess"], [146, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[146, 12, 1, "", "forward"], [146, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[146, 12, 1, "", "fill_none_tensor_list"], [146, 12, 1, "", "get_first_past_key_value"], [146, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[146, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[146, 10, 1, "", "Conv1d"], [146, 10, 1, "", "Conv2d"], [146, 10, 1, "", "Conv3d"], [146, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[146, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [146, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [146, 10, 1, "", "Embedding"], [146, 10, 1, "", "LabelEmbedding"], [146, 10, 1, "", "PixArtAlphaTextProjection"], [146, 10, 1, "", "PromptTuningEmbedding"], [146, 10, 1, "", "SD3PatchEmbed"], [146, 10, 1, "", "TimestepEmbedding"], [146, 10, 1, "", "Timesteps"], [146, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [146, 14, 1, "", "get_2d_sincos_pos_embed"], [146, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [146, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[146, 12, 1, "", "forward"], [146, 12, 1, "", "postprocess"], [146, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[146, 12, 1, "", "forward"], [146, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[146, 12, 1, "", "cropped_pos_embed"], [146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[146, 11, 1, "", "ColumnLinear"], [146, 10, 1, "", "Linear"], [146, 10, 1, "", "LinearBase"], [146, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[146, 12, 1, "", "collect_and_bias"], [146, 12, 1, "", "postprocess"], [146, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[146, 12, 1, "", "collect_and_bias"], [146, 12, 1, "", "forward"], [146, 12, 1, "", "get_weight"], [146, 12, 1, "", "multiply_and_lora"], [146, 12, 1, "", "multiply_collect"], [146, 12, 1, "", "tp_split_dim"], [146, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[146, 12, 1, "", "collect_and_bias"], [146, 12, 1, "", "multiply_collect"], [146, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[146, 10, 1, "", "FusedGatedMLP"], [146, 10, 1, "", "GatedMLP"], [146, 10, 1, "", "LinearActivation"], [146, 10, 1, "", "LinearApproximateGELU"], [146, 10, 1, "", "LinearGEGLU"], [146, 10, 1, "", "LinearGELU"], [146, 10, 1, "", "LinearSwiGLU"], [146, 10, 1, "", "MLP"], [146, 14, 1, "", "fc_gate_dora"], [146, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[146, 12, 1, "", "fc_gate"], [146, 12, 1, "", "fc_gate_plugin"], [146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[146, 10, 1, "", "AdaLayerNorm"], [146, 10, 1, "", "AdaLayerNormContinuous"], [146, 10, 1, "", "AdaLayerNormZero"], [146, 10, 1, "", "AdaLayerNormZeroSingle"], [146, 10, 1, "", "GroupNorm"], [146, 10, 1, "", "LayerNorm"], [146, 10, 1, "", "RmsNorm"], [146, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[146, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[146, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[146, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[159, 10, 1, "", "AsyncLLM"], [159, 10, 1, "", "AttentionDpConfig"], [159, 10, 1, "", "AutoDecodingConfig"], [159, 10, 1, "", "BatchingType"], [159, 10, 1, "", "BuildCacheConfig"], [159, 10, 1, "", "BuildConfig"], [159, 10, 1, "", "CacheTransceiverConfig"], [159, 10, 1, "", "CalibConfig"], [159, 10, 1, "", "CapacitySchedulerPolicy"], [159, 10, 1, "", "CompletionOutput"], [159, 10, 1, "", "ContextChunkingPolicy"], [159, 10, 1, "", "CudaGraphConfig"], [159, 10, 1, "", "DeepSeekSparseAttentionConfig"], [159, 10, 1, "", "DisaggregatedParams"], [159, 10, 1, "", "DraftTargetDecodingConfig"], [159, 10, 1, "", "DynamicBatchConfig"], [159, 10, 1, "", "EagleDecodingConfig"], [159, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [159, 10, 1, "", "GuidedDecodingParams"], [159, 10, 1, "", "KvCacheConfig"], [159, 10, 1, "", "KvCacheRetentionConfig"], [159, 10, 1, "", "LLM"], [159, 11, 1, "", "LlmArgs"], [159, 10, 1, "", "LoRARequest"], [159, 10, 1, "", "LookaheadDecodingConfig"], [159, 10, 1, "", "MTPDecodingConfig"], [159, 10, 1, "", "MedusaDecodingConfig"], [159, 10, 1, "", "MoeConfig"], [159, 10, 1, "", "MpiCommSession"], [159, 10, 1, "", "MultimodalEncoder"], [159, 10, 1, "", "NGramDecodingConfig"], [159, 10, 1, "", "QuantAlgo"], [159, 10, 1, "", "QuantConfig"], [159, 10, 1, "", "RequestError"], [159, 10, 1, "", "RequestOutput"], [159, 10, 1, "", "RocketSparseAttentionConfig"], [159, 10, 1, "", "SamplingParams"], [159, 10, 1, "", "SaveHiddenStatesDecodingConfig"], [159, 10, 1, "", "SchedulerConfig"], [159, 10, 1, "", "TorchCompileConfig"], [159, 10, 1, "", "TorchLlmArgs"], [159, 10, 1, "", "TrtLlmArgs"], [159, 10, 1, "", "UserProvidedDecodingConfig"]], "tensorrt_llm.llmapi.AsyncLLM": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "collective_rpc"], [159, 12, 1, "", "generate"], [159, 12, 1, "", "generate_async"], [159, 12, 1, "", "get_kv_cache_events"], [159, 12, 1, "", "get_kv_cache_events_async"], [159, 12, 1, "", "get_stats"], [159, 12, 1, "", "get_stats_async"], [159, 13, 1, "", "llm_id"], [159, 12, 1, "", "release"], [159, 12, 1, "", "resume"], [159, 12, 1, "", "setup_async"], [159, 12, 1, "", "shutdown"], [159, 13, 1, "", "tokenizer"], [159, 12, 1, "", "update_weights"]], "tensorrt_llm.llmapi.AttentionDpConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "batching_wait_iters"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "enable_balance"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "json"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 15, 1, "", "timeout_iters"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.AttentionDpConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.AutoDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.AutoDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.BatchingType": [[159, 11, 1, "", "INFLIGHT"], [159, 11, 1, "", "STATIC"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "capitalize"], [159, 12, 1, "", "casefold"], [159, 12, 1, "", "center"], [159, 12, 1, "", "count"], [159, 12, 1, "", "encode"], [159, 12, 1, "", "endswith"], [159, 12, 1, "", "expandtabs"], [159, 12, 1, "", "find"], [159, 12, 1, "", "format"], [159, 12, 1, "", "format_map"], [159, 12, 1, "", "index"], [159, 12, 1, "", "isalnum"], [159, 12, 1, "", "isalpha"], [159, 12, 1, "", "isascii"], [159, 12, 1, "", "isdecimal"], [159, 12, 1, "", "isdigit"], [159, 12, 1, "", "isidentifier"], [159, 12, 1, "", "islower"], [159, 12, 1, "", "isnumeric"], [159, 12, 1, "", "isprintable"], [159, 12, 1, "", "isspace"], [159, 12, 1, "", "istitle"], [159, 12, 1, "", "isupper"], [159, 12, 1, "", "join"], [159, 12, 1, "", "ljust"], [159, 12, 1, "", "lower"], [159, 12, 1, "", "lstrip"], [159, 12, 1, "", "maketrans"], [159, 12, 1, "", "partition"], [159, 12, 1, "", "removeprefix"], [159, 12, 1, "", "removesuffix"], [159, 12, 1, "", "replace"], [159, 12, 1, "", "rfind"], [159, 12, 1, "", "rindex"], [159, 12, 1, "", "rjust"], [159, 12, 1, "", "rpartition"], [159, 12, 1, "", "rsplit"], [159, 12, 1, "", "rstrip"], [159, 12, 1, "", "split"], [159, 12, 1, "", "splitlines"], [159, 12, 1, "", "startswith"], [159, 12, 1, "", "strip"], [159, 12, 1, "", "swapcase"], [159, 12, 1, "", "title"], [159, 12, 1, "", "translate"], [159, 12, 1, "", "upper"], [159, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[159, 12, 1, "", "__init__"], [159, 13, 1, "id13", "cache_root"], [159, 13, 1, "id14", "max_cache_storage_gb"], [159, 13, 1, "id15", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "dry_run"], [159, 15, 1, "", "enable_debug_output"], [159, 15, 1, "", "force_num_profiles"], [159, 12, 1, "", "from_json_file"], [159, 12, 1, "", "from_orm"], [159, 15, 1, "", "gather_context_logits"], [159, 15, 1, "", "gather_generation_logits"], [159, 15, 1, "", "input_timing_cache"], [159, 12, 1, "", "json"], [159, 15, 1, "", "kv_cache_type"], [159, 15, 1, "", "lora_config"], [159, 15, 1, "", "max_batch_size"], [159, 15, 1, "", "max_beam_width"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_encoder_input_len"], [159, 15, 1, "", "max_input_len"], [159, 15, 1, "", "max_num_tokens"], [159, 15, 1, "", "max_prompt_embedding_table_size"], [159, 15, 1, "", "max_seq_len"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 15, 1, "", "monitor_memory"], [159, 15, 1, "", "opt_batch_size"], [159, 15, 1, "", "opt_num_tokens"], [159, 15, 1, "", "output_timing_cache"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "plugin_config"], [159, 15, 1, "", "profiling_verbosity"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 15, 1, "", "speculative_decoding_mode"], [159, 15, 1, "", "strongly_typed"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "update_kv_cache_type"], [159, 15, 1, "", "use_mrope"], [159, 15, 1, "", "use_refit"], [159, 15, 1, "", "use_strip_plan"], [159, 12, 1, "", "validate"], [159, 15, 1, "", "visualize_network"], [159, 15, 1, "", "weight_sparsity"], [159, 15, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "backend"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 12, 1, "", "json"], [159, 15, 1, "", "kv_transfer_sender_future_timeout_ms"], [159, 15, 1, "", "kv_transfer_timeout_ms"], [159, 15, 1, "", "max_tokens_in_buffer"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CacheTransceiverConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CalibConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "calib_batch_size"], [159, 15, 1, "", "calib_batches"], [159, 15, 1, "", "calib_dataset"], [159, 15, 1, "", "calib_max_seq_length"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 15, 1, "", "device"], [159, 12, 1, "", "dict"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "json"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "random_seed"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "to_dict"], [159, 15, 1, "", "tokenizer_max_seq_length"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CalibConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[159, 11, 1, "", "GUARANTEED_NO_EVICT"], [159, 11, 1, "", "MAX_UTILIZATION"], [159, 11, 1, "", "STATIC_BATCH"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "capitalize"], [159, 12, 1, "", "casefold"], [159, 12, 1, "", "center"], [159, 12, 1, "", "count"], [159, 12, 1, "", "encode"], [159, 12, 1, "", "endswith"], [159, 12, 1, "", "expandtabs"], [159, 12, 1, "", "find"], [159, 12, 1, "", "format"], [159, 12, 1, "", "format_map"], [159, 12, 1, "", "index"], [159, 12, 1, "", "isalnum"], [159, 12, 1, "", "isalpha"], [159, 12, 1, "", "isascii"], [159, 12, 1, "", "isdecimal"], [159, 12, 1, "", "isdigit"], [159, 12, 1, "", "isidentifier"], [159, 12, 1, "", "islower"], [159, 12, 1, "", "isnumeric"], [159, 12, 1, "", "isprintable"], [159, 12, 1, "", "isspace"], [159, 12, 1, "", "istitle"], [159, 12, 1, "", "isupper"], [159, 12, 1, "", "join"], [159, 12, 1, "", "ljust"], [159, 12, 1, "", "lower"], [159, 12, 1, "", "lstrip"], [159, 12, 1, "", "maketrans"], [159, 12, 1, "", "partition"], [159, 12, 1, "", "removeprefix"], [159, 12, 1, "", "removesuffix"], [159, 12, 1, "", "replace"], [159, 12, 1, "", "rfind"], [159, 12, 1, "", "rindex"], [159, 12, 1, "", "rjust"], [159, 12, 1, "", "rpartition"], [159, 12, 1, "", "rsplit"], [159, 12, 1, "", "rstrip"], [159, 12, 1, "", "split"], [159, 12, 1, "", "splitlines"], [159, 12, 1, "", "startswith"], [159, 12, 1, "", "strip"], [159, 12, 1, "", "swapcase"], [159, 12, 1, "", "title"], [159, 12, 1, "", "translate"], [159, 12, 1, "", "upper"], [159, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CompletionOutput": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "additional_context_outputs"], [159, 11, 1, "", "additional_generation_outputs"], [159, 11, 1, "", "cumulative_logprob"], [159, 11, 1, "", "disaggregated_params"], [159, 11, 1, "", "finish_reason"], [159, 11, 1, "", "generation_logits"], [159, 11, 1, "", "index"], [159, 13, 1, "id2", "length"], [159, 11, 1, "", "logprobs"], [159, 13, 1, "id3", "logprobs_diff"], [159, 11, 1, "", "prompt_logprobs"], [159, 11, 1, "", "request_perf_metrics"], [159, 11, 1, "", "stop_reason"], [159, 11, 1, "", "text"], [159, 13, 1, "id4", "text_diff"], [159, 11, 1, "", "token_ids"], [159, 13, 1, "id5", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[159, 11, 1, "", "EQUAL_PROGRESS"], [159, 11, 1, "", "FIRST_COME_FIRST_SERVED"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "capitalize"], [159, 12, 1, "", "casefold"], [159, 12, 1, "", "center"], [159, 12, 1, "", "count"], [159, 12, 1, "", "encode"], [159, 12, 1, "", "endswith"], [159, 12, 1, "", "expandtabs"], [159, 12, 1, "", "find"], [159, 12, 1, "", "format"], [159, 12, 1, "", "format_map"], [159, 12, 1, "", "index"], [159, 12, 1, "", "isalnum"], [159, 12, 1, "", "isalpha"], [159, 12, 1, "", "isascii"], [159, 12, 1, "", "isdecimal"], [159, 12, 1, "", "isdigit"], [159, 12, 1, "", "isidentifier"], [159, 12, 1, "", "islower"], [159, 12, 1, "", "isnumeric"], [159, 12, 1, "", "isprintable"], [159, 12, 1, "", "isspace"], [159, 12, 1, "", "istitle"], [159, 12, 1, "", "isupper"], [159, 12, 1, "", "join"], [159, 12, 1, "", "ljust"], [159, 12, 1, "", "lower"], [159, 12, 1, "", "lstrip"], [159, 12, 1, "", "maketrans"], [159, 12, 1, "", "partition"], [159, 12, 1, "", "removeprefix"], [159, 12, 1, "", "removesuffix"], [159, 12, 1, "", "replace"], [159, 12, 1, "", "rfind"], [159, 12, 1, "", "rindex"], [159, 12, 1, "", "rjust"], [159, 12, 1, "", "rpartition"], [159, 12, 1, "", "rsplit"], [159, 12, 1, "", "rstrip"], [159, 12, 1, "", "split"], [159, 12, 1, "", "splitlines"], [159, 12, 1, "", "startswith"], [159, 12, 1, "", "strip"], [159, 12, 1, "", "swapcase"], [159, 12, 1, "", "title"], [159, 12, 1, "", "translate"], [159, 12, 1, "", "upper"], [159, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CudaGraphConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "batch_sizes"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "enable_padding"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "json"], [159, 15, 1, "", "max_batch_size"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_cuda_graph_max_batch_size"]], "tensorrt_llm.llmapi.CudaGraphConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 11, 1, "", "algorithm"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "get_indices_block_size"], [159, 15, 1, "", "index_head_dim"], [159, 15, 1, "", "index_n_heads"], [159, 15, 1, "", "index_topk"], [159, 15, 1, "", "indexer_max_chunk_size"], [159, 12, 1, "", "json"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "needs_separate_short_long_cuda_graphs"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 15, 1, "", "seq_len_threshold"], [159, 15, 1, "", "skip_indexer_for_short_seqs"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "ctx_request_id"], [159, 11, 1, "", "draft_tokens"], [159, 11, 1, "", "first_gen_tokens"], [159, 12, 1, "", "get_context_phase_params"], [159, 12, 1, "", "get_request_type"], [159, 11, 1, "", "multimodal_embedding_handles"], [159, 11, 1, "", "multimodal_hashes"], [159, 11, 1, "", "opaque_state"], [159, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "dynamic_batch_moving_average_window"], [159, 15, 1, "", "enable_batch_size_tuning"], [159, 15, 1, "", "enable_max_num_tokens_tuning"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 12, 1, "", "json"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DynamicBatchConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "check_eagle_choices"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 15, 1, "", "dynamic_tree_max_topK"], [159, 15, 1, "", "eagle3_layers_to_capture"], [159, 15, 1, "", "eagle3_one_model"], [159, 15, 1, "", "eagle_choices"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 15, 1, "", "greedy_sampling"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_non_leaves_per_layer"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 13, 1, "", "num_capture_layers"], [159, 15, 1, "", "num_eagle_layers"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "posterior_threshold"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 15, 1, "", "use_dynamic_tree"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.EagleDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 15, 1, "", "cuda_graph_cache_size"], [159, 15, 1, "", "cuda_graph_mode"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "enable_context_fmha_fp32_acc"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 12, 1, "", "json"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 15, 1, "", "multi_block_mode"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "grammar"], [159, 11, 1, "", "json"], [159, 11, 1, "", "json_object"], [159, 11, 1, "", "regex"], [159, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "attention_dp_events_gather_period_ms"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 15, 1, "", "copy_on_partial_reuse"], [159, 15, 1, "", "cross_kv_cache_fraction"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "dtype"], [159, 15, 1, "", "enable_block_reuse"], [159, 15, 1, "", "enable_partial_reuse"], [159, 15, 1, "", "event_buffer_max_size"], [159, 15, 1, "", "free_gpu_memory_fraction"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 15, 1, "", "host_cache_size"], [159, 12, 1, "", "json"], [159, 15, 1, "", "mamba_ssm_cache_dtype"], [159, 15, 1, "", "max_attention_window"], [159, 15, 1, "", "max_gpu_total_bytes"], [159, 15, 1, "", "max_tokens"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 15, 1, "", "onboard_blocks"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 15, 1, "", "secondary_offload_min_priority"], [159, 15, 1, "", "sink_token_length"], [159, 15, 1, "", "tokens_per_block"], [159, 12, 1, "", "update_forward_refs"], [159, 15, 1, "", "use_uvm"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_free_gpu_memory_fraction"], [159, 16, 1, "", "validate_max_attention_window"], [159, 16, 1, "", "validate_max_gpu_total_bytes"]], "tensorrt_llm.llmapi.KvCacheConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[159, 10, 1, "", "TokenRangeRetentionConfig"], [159, 11, 1, "", "__init__"], [159, 13, 1, "", "decode_duration_ms"], [159, 13, 1, "", "decode_retention_priority"], [159, 13, 1, "", "directory"], [159, 13, 1, "", "token_range_retention_configs"], [159, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[159, 11, 1, "", "__init__"], [159, 13, 1, "", "duration_ms"], [159, 13, 1, "", "priority"], [159, 13, 1, "", "token_end"], [159, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "generate"], [159, 12, 1, "", "generate_async"], [159, 12, 1, "", "get_kv_cache_events"], [159, 12, 1, "", "get_kv_cache_events_async"], [159, 12, 1, "", "get_stats"], [159, 12, 1, "", "get_stats_async"], [159, 13, 1, "id0", "llm_id"], [159, 12, 1, "", "shutdown"], [159, 13, 1, "id1", "tokenizer"]], "tensorrt_llm.llmapi.LoRARequest": [[159, 12, 1, "", "__init__"], [159, 13, 1, "", "adapter_id"], [159, 13, 1, "", "ckpt_source"], [159, 11, 1, "", "lora_ckpt_source"], [159, 11, 1, "", "lora_int_id"], [159, 11, 1, "", "lora_name"], [159, 11, 1, "", "lora_path"], [159, 13, 1, "", "name"], [159, 13, 1, "", "path"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "calculate_speculative_resource"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_ngram_size"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 15, 1, "", "max_verification_set_size"], [159, 15, 1, "", "max_window_size"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"], [159, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 15, 1, "", "begin_thinking_phase_token"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 15, 1, "", "end_thinking_phase_token"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 15, 1, "", "mtp_eagle_one_model"], [159, 13, 1, "", "num_capture_layers"], [159, 15, 1, "", "num_nextn_predict_layers"], [159, 15, 1, "", "num_nextn_predict_layers_from_model_config"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "relaxed_delta"], [159, 15, 1, "", "relaxed_topk"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 15, 1, "", "use_mtp_vanilla"], [159, 15, 1, "", "use_relaxed_acceptance_for_thinking"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MTPDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 15, 1, "", "medusa_choices"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 15, 1, "", "num_medusa_heads"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MedusaDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MoeConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "backend"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "disable_finalize_fusion"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_balancer"], [159, 15, 1, "", "max_num_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 15, 1, "", "use_low_precision_moe_combine"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.MoeConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MpiCommSession": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "abort"], [159, 12, 1, "", "get_comm"], [159, 12, 1, "", "is_comm_session"], [159, 12, 1, "", "shutdown"], [159, 12, 1, "", "shutdown_abort"], [159, 12, 1, "", "submit"], [159, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.MultimodalEncoder": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "generate"], [159, 12, 1, "", "generate_async"], [159, 12, 1, "", "get_kv_cache_events"], [159, 12, 1, "", "get_kv_cache_events_async"], [159, 12, 1, "", "get_stats"], [159, 12, 1, "", "get_stats_async"], [159, 13, 1, "", "llm_id"], [159, 12, 1, "", "shutdown"], [159, 13, 1, "", "tokenizer"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 15, 1, "", "is_keep_all"], [159, 13, 1, "", "is_linear_tree"], [159, 15, 1, "", "is_public_pool"], [159, 15, 1, "", "is_use_oldest"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_matching_ngram_size"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.NGramDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.QuantAlgo": [[159, 11, 1, "", "FP8"], [159, 11, 1, "", "FP8_BLOCK_SCALES"], [159, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [159, 11, 1, "", "INT8"], [159, 11, 1, "", "MIXED_PRECISION"], [159, 11, 1, "", "NO_QUANT"], [159, 11, 1, "", "NVFP4"], [159, 11, 1, "", "NVFP4_AWQ"], [159, 11, 1, "", "W4A16"], [159, 11, 1, "", "W4A16_AWQ"], [159, 11, 1, "", "W4A16_GPTQ"], [159, 11, 1, "", "W4A16_MXFP4"], [159, 11, 1, "", "W4A8_AWQ"], [159, 11, 1, "", "W4A8_MXFP4_FP8"], [159, 11, 1, "", "W4A8_MXFP4_MXFP8"], [159, 11, 1, "", "W4A8_NVFP4_FP8"], [159, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [159, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [159, 11, 1, "", "W8A16"], [159, 11, 1, "", "W8A16_GPTQ"], [159, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [159, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [159, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [159, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [159, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "capitalize"], [159, 12, 1, "", "casefold"], [159, 12, 1, "", "center"], [159, 12, 1, "", "count"], [159, 12, 1, "", "encode"], [159, 12, 1, "", "endswith"], [159, 12, 1, "", "expandtabs"], [159, 12, 1, "", "find"], [159, 12, 1, "", "format"], [159, 12, 1, "", "format_map"], [159, 12, 1, "", "index"], [159, 12, 1, "", "isalnum"], [159, 12, 1, "", "isalpha"], [159, 12, 1, "", "isascii"], [159, 12, 1, "", "isdecimal"], [159, 12, 1, "", "isdigit"], [159, 12, 1, "", "isidentifier"], [159, 12, 1, "", "islower"], [159, 12, 1, "", "isnumeric"], [159, 12, 1, "", "isprintable"], [159, 12, 1, "", "isspace"], [159, 12, 1, "", "istitle"], [159, 12, 1, "", "isupper"], [159, 12, 1, "", "join"], [159, 12, 1, "", "ljust"], [159, 12, 1, "", "lower"], [159, 12, 1, "", "lstrip"], [159, 12, 1, "", "maketrans"], [159, 12, 1, "", "partition"], [159, 12, 1, "", "removeprefix"], [159, 12, 1, "", "removesuffix"], [159, 12, 1, "", "replace"], [159, 12, 1, "", "rfind"], [159, 12, 1, "", "rindex"], [159, 12, 1, "", "rjust"], [159, 12, 1, "", "rpartition"], [159, 12, 1, "", "rsplit"], [159, 12, 1, "", "rstrip"], [159, 12, 1, "", "split"], [159, 12, 1, "", "splitlines"], [159, 12, 1, "", "startswith"], [159, 12, 1, "", "strip"], [159, 12, 1, "", "swapcase"], [159, 12, 1, "", "title"], [159, 12, 1, "", "translate"], [159, 12, 1, "", "upper"], [159, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.QuantConfig": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "clamp_val"], [159, 11, 1, "", "exclude_modules"], [159, 12, 1, "", "from_dict"], [159, 11, 1, "", "group_size"], [159, 11, 1, "", "has_zero_point"], [159, 12, 1, "", "is_module_excluded_from_quantization"], [159, 11, 1, "", "kv_cache_quant_algo"], [159, 13, 1, "", "layer_quant_mode"], [159, 11, 1, "", "mamba_ssm_cache_dtype"], [159, 11, 1, "", "pre_quant_scale"], [159, 11, 1, "", "quant_algo"], [159, 13, 1, "", "quant_mode"], [159, 11, 1, "", "smoothquant_val"], [159, 12, 1, "", "to_dict"], [159, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestError": [[159, 12, 1, "", "__init__"], [159, 12, 1, "", "add_note"], [159, 11, 1, "", "args"], [159, 12, 1, "", "with_traceback"]], "tensorrt_llm.llmapi.RequestOutput": [[159, 10, 1, "", "PostprocWorker"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "abort"], [159, 12, 1, "", "aborted"], [159, 12, 1, "", "aresult"], [159, 12, 1, "", "clear_logprob_params"], [159, 13, 1, "id6", "context_logits"], [159, 12, 1, "", "do_tracing"], [159, 13, 1, "id7", "finished"], [159, 13, 1, "id8", "mm_embedding_handle"], [159, 13, 1, "id9", "outputs"], [159, 13, 1, "id10", "prompt"], [159, 13, 1, "id11", "prompt_token_ids"], [159, 12, 1, "", "record_stats"], [159, 13, 1, "id12", "request_id"], [159, 12, 1, "", "result"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker": [[159, 10, 1, "", "Input"], [159, 10, 1, "", "Output"], [159, 12, 1, "", "__init__"], [159, 12, 1, "", "default_record_creator"], [159, 12, 1, "", "start"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "postproc_params"], [159, 11, 1, "", "rsp"], [159, 11, 1, "", "sampling_params"], [159, 11, 1, "", "streaming"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output": [[159, 11, 1, "", "client_id"], [159, 12, 1, "", "count"], [159, 11, 1, "", "disaggregated_params"], [159, 11, 1, "", "error"], [159, 12, 1, "", "index"], [159, 11, 1, "", "is_final"], [159, 11, 1, "", "metrics"], [159, 11, 1, "", "request_perf_metrics"], [159, 11, 1, "", "res"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 11, 1, "", "algorithm"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "get_indices_block_size"], [159, 12, 1, "", "json"], [159, 15, 1, "", "kernel_size"], [159, 15, 1, "", "kt_cache_dtype"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "needs_separate_short_long_cuda_graphs"], [159, 15, 1, "", "page_size"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "prompt_budget"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 15, 1, "", "seq_len_threshold"], [159, 12, 1, "", "supports_backend"], [159, 15, 1, "", "topk"], [159, 15, 1, "", "topr"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 15, 1, "", "window_size"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SamplingParams": [[159, 12, 1, "", "__init__"], [159, 11, 1, "", "add_special_tokens"], [159, 11, 1, "", "additional_model_outputs"], [159, 11, 1, "", "apply_batched_logits_processor"], [159, 11, 1, "", "bad"], [159, 11, 1, "", "bad_token_ids"], [159, 11, 1, "", "beam_search_diversity_rate"], [159, 11, 1, "", "beam_width_array"], [159, 11, 1, "", "best_of"], [159, 11, 1, "", "detokenize"], [159, 11, 1, "", "early_stopping"], [159, 11, 1, "", "embedding_bias"], [159, 11, 1, "", "end_id"], [159, 11, 1, "", "exclude_input_from_output"], [159, 11, 1, "", "frequency_penalty"], [159, 11, 1, "", "guided_decoding"], [159, 11, 1, "", "ignore_eos"], [159, 11, 1, "", "include_stop_str_in_output"], [159, 11, 1, "", "length_penalty"], [159, 11, 1, "", "logits_processor"], [159, 11, 1, "", "logprobs"], [159, 11, 1, "", "lookahead_config"], [159, 11, 1, "", "max_tokens"], [159, 11, 1, "", "min_p"], [159, 11, 1, "", "min_tokens"], [159, 11, 1, "", "n"], [159, 11, 1, "", "no_repeat_ngram_size"], [159, 11, 1, "", "pad_id"], [159, 12, 1, "", "params_imply_greedy_decoding"], [159, 11, 1, "", "presence_penalty"], [159, 11, 1, "", "prompt_ignore_length"], [159, 11, 1, "", "prompt_logprobs"], [159, 11, 1, "", "repetition_penalty"], [159, 11, 1, "", "return_context_logits"], [159, 11, 1, "", "return_encoder_output"], [159, 11, 1, "", "return_generation_logits"], [159, 11, 1, "", "return_perf_metrics"], [159, 11, 1, "", "seed"], [159, 11, 1, "", "skip_special_tokens"], [159, 11, 1, "", "spaces_between_special_tokens"], [159, 11, 1, "", "stop"], [159, 11, 1, "", "stop_token_ids"], [159, 11, 1, "", "temperature"], [159, 11, 1, "", "top_k"], [159, 11, 1, "", "top_p"], [159, 11, 1, "", "top_p_decay"], [159, 11, 1, "", "top_p_min"], [159, 11, 1, "", "top_p_reset_ids"], [159, 11, 1, "", "truncate_prompt_tokens"], [159, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 15, 1, "", "eagle3_layers_to_capture"], [159, 15, 1, "", "eagle_choices"], [159, 15, 1, "", "file_prefix"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 13, 1, "", "num_capture_layers"], [159, 15, 1, "", "output_directory"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"], [159, 15, 1, "", "write_interval"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SchedulerConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "capacity_scheduler_policy"], [159, 12, 1, "", "construct"], [159, 15, 1, "", "context_chunking_policy"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "dynamic_batch_config"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "from_pybind"], [159, 12, 1, "", "get_pybind_enum_fields"], [159, 12, 1, "", "get_pybind_variable_fields"], [159, 12, 1, "", "json"], [159, 12, 1, "", "maybe_to_pybind"], [159, 12, 1, "", "mirror_pybind_enum"], [159, 12, 1, "", "mirror_pybind_fields"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "pybind_equals"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.SchedulerConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchCompileConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "capture_num_tokens"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "enable_fullgraph"], [159, 15, 1, "", "enable_inductor"], [159, 15, 1, "", "enable_piecewise_cuda_graph"], [159, 15, 1, "", "enable_userbuffers"], [159, 12, 1, "", "from_orm"], [159, 12, 1, "", "json"], [159, 15, 1, "", "max_num_streams"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_capture_num_tokens"], [159, 16, 1, "", "validate_torch_compile_max_num_streams"]], "tensorrt_llm.llmapi.TorchCompileConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "allreduce_strategy"], [159, 15, 1, "", "attention_dp_config"], [159, 15, 1, "", "attn_backend"], [159, 15, 1, "", "backend"], [159, 15, 1, "", "batch_wait_max_tokens_ratio"], [159, 15, 1, "", "batch_wait_timeout_iters"], [159, 15, 1, "", "batch_wait_timeout_ms"], [159, 15, 1, "", "batched_logits_processor"], [159, 15, 1, "", "cache_transceiver_config"], [159, 15, 1, "", "checkpoint_format"], [159, 15, 1, "", "checkpoint_loader"], [159, 16, 1, "", "coerce_env_overrides_to_str"], [159, 15, 1, "", "context_parallel_size"], [159, 16, 1, "", "convert_load_format"], [159, 15, 1, "", "cp_config"], [159, 15, 1, "", "cuda_graph_config"], [159, 11, 1, "", "decoding_config"], [159, 15, 1, "", "disable_flashinfer_sampling"], [159, 15, 1, "", "disable_overlap_scheduler"], [159, 15, 1, "", "dtype"], [159, 15, 1, "", "enable_attention_dp"], [159, 15, 1, "", "enable_autotuner"], [159, 15, 1, "", "enable_chunked_prefill"], [159, 15, 1, "", "enable_iter_perf_stats"], [159, 15, 1, "", "enable_iter_req_stats"], [159, 15, 1, "", "enable_layerwise_nvtx_marker"], [159, 15, 1, "", "enable_lm_head_tp_in_adp"], [159, 15, 1, "", "enable_lora"], [159, 15, 1, "", "enable_min_latency"], [159, 15, 1, "", "enable_sleep"], [159, 15, 1, "", "env_overrides"], [159, 13, 1, "", "extra_resource_managers"], [159, 15, 1, "", "fail_fast_on_attention_window_too_large"], [159, 11, 1, "", "field_name"], [159, 15, 1, "", "force_dynamic_quantization"], [159, 12, 1, "", "from_kwargs"], [159, 15, 1, "", "garbage_collection_gen0_threshold"], [159, 15, 1, "", "gather_generation_logits"], [159, 12, 1, "", "get_executor_config"], [159, 12, 1, "", "get_runtime_sizes"], [159, 15, 1, "", "gpus_per_node"], [159, 15, 1, "", "guided_decoding_backend"], [159, 16, 1, "", "init_backend"], [159, 15, 1, "", "iter_stats_max_iterations"], [159, 15, 1, "", "kv_cache_config"], [159, 15, 1, "", "kv_connector_config"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "lora_config"], [159, 15, 1, "", "max_batch_size"], [159, 15, 1, "", "max_beam_width"], [159, 15, 1, "", "max_input_len"], [159, 15, 1, "", "max_num_tokens"], [159, 15, 1, "", "max_seq_len"], [159, 15, 1, "", "mm_encoder_only"], [159, 15, 1, "", "model"], [159, 13, 1, "", "model_format"], [159, 15, 1, "", "moe_cluster_parallel_size"], [159, 15, 1, "", "moe_config"], [159, 15, 1, "", "moe_expert_parallel_size"], [159, 15, 1, "", "moe_tensor_parallel_size"], [159, 15, 1, "", "mpi_session"], [159, 11, 1, "", "msg"], [159, 15, 1, "", "num_postprocess_workers"], [159, 15, 1, "", "nvfp4_gemm_config"], [159, 15, 1, "", "orchestrator_type"], [159, 15, 1, "", "otlp_traces_endpoint"], [159, 13, 1, "", "parallel_config"], [159, 15, 1, "", "peft_cache_config"], [159, 15, 1, "", "perf_metrics_max_requests"], [159, 15, 1, "", "pipeline_parallel_size"], [159, 15, 1, "", "postprocess_tokenizer_dir"], [159, 15, 1, "", "pp_partition"], [159, 15, 1, "", "print_iter_log"], [159, 13, 1, "", "quant_config"], [159, 15, 1, "", "ray_placement_config"], [159, 15, 1, "", "ray_worker_extension_cls"], [159, 15, 1, "", "reasoning_parser"], [159, 15, 1, "", "request_stats_max_iterations"], [159, 15, 1, "", "return_perf_metrics"], [159, 15, 1, "", "revision"], [159, 15, 1, "", "sampler_force_async_worker"], [159, 15, 1, "", "sampler_type"], [159, 15, 1, "", "scheduler_config"], [159, 16, 1, "", "set_default_max_input_len"], [159, 15, 1, "", "skip_tokenizer_init"], [159, 15, 1, "", "sparse_attention_config"], [159, 15, 1, "", "speculative_config"], [159, 13, 1, "", "speculative_model_dir"], [159, 13, 1, "", "speculative_model_format"], [159, 15, 1, "", "stream_interval"], [159, 16, 1, "", "sync_quant_config_with_kv_cache_config_dtype"], [159, 15, 1, "", "tensor_parallel_size"], [159, 15, 1, "", "tokenizer"], [159, 15, 1, "", "tokenizer_mode"], [159, 15, 1, "", "tokenizer_revision"], [159, 15, 1, "", "torch_compile_config"], [159, 15, 1, "", "trust_remote_code"], [159, 16, 1, "", "validate_and_init_tokenizer"], [159, 16, 1, "", "validate_attention_dp_config"], [159, 16, 1, "", "validate_batch_wait_max_tokens_ratio"], [159, 16, 1, "", "validate_batch_wait_timeout_iters"], [159, 16, 1, "", "validate_batch_wait_timeout_ms"], [159, 16, 1, "", "validate_checkpoint_format"], [159, 16, 1, "", "validate_cuda_graph_config"], [159, 16, 1, "", "validate_dtype"], [159, 16, 1, "", "validate_gpus_per_node"], [159, 16, 1, "", "validate_helix_tokens_per_block"], [159, 16, 1, "", "validate_load_balancer"], [159, 16, 1, "", "validate_lora_config_consistency"], [159, 16, 1, "", "validate_misc"], [159, 16, 1, "", "validate_model"], [159, 16, 1, "", "validate_parallel_config"], [159, 16, 1, "", "validate_peft_cache_config"], [159, 16, 1, "", "validate_ray_placement_config"], [159, 16, 1, "", "validate_ray_worker_extension_cls"], [159, 16, 1, "", "validate_runtime_args"], [159, 16, 1, "", "validate_speculative_config"], [159, 16, 1, "", "validate_stream_interval"], [159, 16, 1, "", "validate_torch_compile_config"], [159, 12, 1, "", "warn_on_unstable_feature_usage"], [159, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TorchLlmArgs.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "backend"], [159, 15, 1, "", "batched_logits_processor"], [159, 15, 1, "", "batching_type"], [159, 15, 1, "", "build_config"], [159, 15, 1, "", "cache_transceiver_config"], [159, 15, 1, "", "calib_config"], [159, 16, 1, "", "coerce_env_overrides_to_str"], [159, 15, 1, "", "context_parallel_size"], [159, 15, 1, "", "cp_config"], [159, 11, 1, "", "decoding_config"], [159, 15, 1, "", "dtype"], [159, 15, 1, "", "embedding_parallel_mode"], [159, 15, 1, "", "enable_attention_dp"], [159, 15, 1, "", "enable_build_cache"], [159, 15, 1, "", "enable_chunked_prefill"], [159, 15, 1, "", "enable_lm_head_tp_in_adp"], [159, 15, 1, "", "enable_lora"], [159, 15, 1, "", "enable_prompt_adapter"], [159, 15, 1, "", "enable_tqdm"], [159, 15, 1, "", "env_overrides"], [159, 15, 1, "", "extended_runtime_perf_knob_config"], [159, 15, 1, "", "fail_fast_on_attention_window_too_large"], [159, 15, 1, "", "fast_build"], [159, 11, 1, "", "field_name"], [159, 12, 1, "", "from_kwargs"], [159, 15, 1, "", "gather_generation_logits"], [159, 12, 1, "", "get_runtime_sizes"], [159, 15, 1, "", "gpus_per_node"], [159, 15, 1, "", "guided_decoding_backend"], [159, 16, 1, "", "init_build_config"], [159, 16, 1, "", "init_calib_config"], [159, 15, 1, "", "iter_stats_max_iterations"], [159, 15, 1, "", "kv_cache_config"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "lora_config"], [159, 15, 1, "", "max_batch_size"], [159, 15, 1, "", "max_beam_width"], [159, 15, 1, "", "max_input_len"], [159, 15, 1, "", "max_num_tokens"], [159, 15, 1, "", "max_prompt_adapter_token"], [159, 15, 1, "", "max_seq_len"], [159, 15, 1, "", "model"], [159, 13, 1, "", "model_format"], [159, 15, 1, "", "moe_cluster_parallel_size"], [159, 15, 1, "", "moe_expert_parallel_size"], [159, 15, 1, "", "moe_tensor_parallel_size"], [159, 15, 1, "", "mpi_session"], [159, 11, 1, "", "msg"], [159, 15, 1, "", "normalize_log_probs"], [159, 15, 1, "", "num_postprocess_workers"], [159, 15, 1, "", "orchestrator_type"], [159, 15, 1, "", "otlp_traces_endpoint"], [159, 13, 1, "", "parallel_config"], [159, 15, 1, "", "peft_cache_config"], [159, 15, 1, "", "pipeline_parallel_size"], [159, 15, 1, "", "postprocess_tokenizer_dir"], [159, 15, 1, "", "pp_partition"], [159, 15, 1, "", "quant_config"], [159, 15, 1, "", "reasoning_parser"], [159, 15, 1, "", "request_stats_max_iterations"], [159, 15, 1, "", "return_perf_metrics"], [159, 15, 1, "", "revision"], [159, 15, 1, "", "scheduler_config"], [159, 16, 1, "", "set_default_max_input_len"], [159, 16, 1, "", "setup_embedding_parallel_mode"], [159, 15, 1, "", "skip_tokenizer_init"], [159, 15, 1, "", "sparse_attention_config"], [159, 15, 1, "", "speculative_config"], [159, 13, 1, "", "speculative_model_dir"], [159, 13, 1, "", "speculative_model_format"], [159, 15, 1, "", "tensor_parallel_size"], [159, 15, 1, "", "tokenizer"], [159, 15, 1, "", "tokenizer_mode"], [159, 15, 1, "", "tokenizer_revision"], [159, 15, 1, "", "trust_remote_code"], [159, 16, 1, "", "validate_and_init_tokenizer"], [159, 16, 1, "", "validate_build_config_remaining"], [159, 16, 1, "", "validate_build_config_with_runtime_params"], [159, 16, 1, "", "validate_dtype"], [159, 16, 1, "", "validate_enable_build_cache"], [159, 16, 1, "", "validate_gpus_per_node"], [159, 16, 1, "", "validate_kv_cache_dtype"], [159, 16, 1, "", "validate_lora_config_consistency"], [159, 16, 1, "", "validate_model"], [159, 16, 1, "", "validate_model_format_misc"], [159, 16, 1, "", "validate_parallel_config"], [159, 16, 1, "", "validate_peft_cache_config"], [159, 16, 1, "", "validate_quant_config"], [159, 16, 1, "", "validate_runtime_args"], [159, 16, 1, "", "validate_speculative_config"], [159, 15, 1, "", "workspace"], [159, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig": [[159, 10, 1, "", "Config"], [159, 12, 1, "", "__init__"], [159, 15, 1, "", "acceptance_length_threshold"], [159, 15, 1, "", "acceptance_window"], [159, 15, 1, "", "allow_advanced_sampling"], [159, 12, 1, "", "construct"], [159, 12, 1, "", "copy"], [159, 11, 1, "", "decoding_type"], [159, 12, 1, "", "dict"], [159, 15, 1, "", "draft_len_schedule"], [159, 15, 1, "", "drafter"], [159, 12, 1, "", "from_dict"], [159, 12, 1, "", "from_orm"], [159, 13, 1, "", "is_linear_tree"], [159, 12, 1, "", "json"], [159, 15, 1, "", "load_format"], [159, 15, 1, "", "max_concurrency"], [159, 15, 1, "", "max_draft_len"], [159, 15, 1, "", "max_total_draft_tokens"], [159, 11, 1, "", "model_computed_fields"], [159, 11, 1, "", "model_config"], [159, 12, 1, "", "model_construct"], [159, 12, 1, "", "model_copy"], [159, 12, 1, "", "model_dump"], [159, 12, 1, "", "model_dump_json"], [159, 13, 1, "", "model_extra"], [159, 11, 1, "", "model_fields"], [159, 13, 1, "", "model_fields_set"], [159, 12, 1, "", "model_json_schema"], [159, 12, 1, "", "model_parametrized_name"], [159, 12, 1, "", "model_post_init"], [159, 12, 1, "", "model_rebuild"], [159, 12, 1, "", "model_validate"], [159, 12, 1, "", "model_validate_json"], [159, 12, 1, "", "model_validate_strings"], [159, 12, 1, "", "parse_file"], [159, 12, 1, "", "parse_obj"], [159, 12, 1, "", "parse_raw"], [159, 15, 1, "", "resource_manager"], [159, 12, 1, "", "schema"], [159, 12, 1, "", "schema_json"], [159, 13, 1, "", "spec_dec_mode"], [159, 15, 1, "", "speculative_model_dir"], [159, 12, 1, "", "supports_backend"], [159, 12, 1, "", "update_forward_refs"], [159, 12, 1, "", "validate"], [159, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config": [[159, 11, 1, "", "extra"]], "tensorrt_llm.models": [[147, 10, 1, "", "BaichuanForCausalLM"], [147, 10, 1, "", "BertForQuestionAnswering"], [147, 10, 1, "", "BertForSequenceClassification"], [147, 10, 1, "", "BertModel"], [147, 10, 1, "", "BloomForCausalLM"], [147, 10, 1, "", "BloomModel"], [147, 10, 1, "", "CLIPVisionTransformer"], [147, 10, 1, "", "ChatGLMConfig"], [147, 10, 1, "", "ChatGLMForCausalLM"], [147, 10, 1, "", "ChatGLMModel"], [147, 10, 1, "", "CogVLMConfig"], [147, 10, 1, "", "CogVLMForCausalLM"], [147, 10, 1, "", "CohereForCausalLM"], [147, 10, 1, "", "DbrxConfig"], [147, 10, 1, "", "DbrxForCausalLM"], [147, 10, 1, "", "DecoderModel"], [147, 10, 1, "", "DeepseekForCausalLM"], [147, 10, 1, "", "DeepseekV2ForCausalLM"], [147, 10, 1, "", "DiT"], [147, 10, 1, "", "EagleForCausalLM"], [147, 10, 1, "", "EncoderModel"], [147, 10, 1, "", "FalconConfig"], [147, 10, 1, "", "FalconForCausalLM"], [147, 10, 1, "", "FalconModel"], [147, 10, 1, "", "GPTConfig"], [147, 10, 1, "", "GPTForCausalLM"], [147, 10, 1, "", "GPTJConfig"], [147, 10, 1, "", "GPTJForCausalLM"], [147, 10, 1, "", "GPTJModel"], [147, 10, 1, "", "GPTModel"], [147, 10, 1, "", "GPTNeoXForCausalLM"], [147, 10, 1, "", "GPTNeoXModel"], [147, 10, 1, "", "GemmaConfig"], [147, 10, 1, "", "GemmaForCausalLM"], [147, 10, 1, "", "LLaMAConfig"], [147, 10, 1, "", "LLaMAForCausalLM"], [147, 10, 1, "", "LLaMAModel"], [147, 10, 1, "", "LlavaNextVisionConfig"], [147, 10, 1, "", "LlavaNextVisionWrapper"], [147, 10, 1, "", "MLLaMAForCausalLM"], [147, 10, 1, "", "MPTForCausalLM"], [147, 10, 1, "", "MPTModel"], [147, 10, 1, "", "MambaForCausalLM"], [147, 10, 1, "", "MedusaConfig"], [147, 10, 1, "", "MedusaForCausalLm"], [147, 10, 1, "", "OPTForCausalLM"], [147, 10, 1, "", "OPTModel"], [147, 10, 1, "", "Phi3ForCausalLM"], [147, 10, 1, "", "Phi3Model"], [147, 10, 1, "", "PhiForCausalLM"], [147, 10, 1, "", "PhiModel"], [147, 10, 1, "", "PretrainedConfig"], [147, 10, 1, "", "PretrainedModel"], [147, 10, 1, "", "ReDrafterForLLaMALM"], [147, 10, 1, "", "ReDrafterForQWenLM"], [147, 10, 1, "", "RecurrentGemmaForCausalLM"], [147, 11, 1, "", "RobertaForQuestionAnswering"], [147, 11, 1, "", "RobertaForSequenceClassification"], [147, 11, 1, "", "RobertaModel"], [147, 10, 1, "", "SD3Transformer2DModel"], [147, 10, 1, "", "SpeculativeDecodingMode"], [147, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "default_plugin_config"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[147, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[147, 12, 1, "", "check_config"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "precompute_relative_attention_bias"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[147, 12, 1, "", "check_config"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "forward_with_cfg"], [147, 12, 1, "", "forward_without_cfg"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[147, 12, 1, "", "check_config"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "precompute_relative_attention_bias"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "use_lora"], [147, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[147, 12, 1, "", "check_config"], [147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "from_nemo"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "from_nemo"], [147, 12, 1, "", "quantize"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[147, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [147, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [147, 11, 1, "", "GEMMA_ADDED_FIELDS"], [147, 11, 1, "", "VERBATIM"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "gemma2_config"], [147, 12, 1, "", "gemma3_config"], [147, 12, 1, "", "get_hf_config"], [147, 13, 1, "", "is_gemma_2"], [147, 13, 1, "", "is_gemma_3"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[147, 11, 1, "", "NATIVE_QUANT_FLOW"], [147, 12, 1, "", "assert_valid_quant_algo"], [147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "quantize"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "from_meta_ckpt"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "default_plugin_config"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "from_meta_ckpt"], [147, 12, 1, "", "quantize"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[147, 12, 1, "", "forward"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[147, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[147, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[147, 12, 1, "", "check_config"], [147, 11, 1, "", "config_class"], [147, 12, 1, "", "from_hugging_face"], [147, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[147, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[147, 12, 1, "", "create_runtime_defaults"], [147, 12, 1, "", "for_each_rank"], [147, 12, 1, "", "from_checkpoint"], [147, 12, 1, "", "from_dict"], [147, 12, 1, "", "from_json_file"], [147, 12, 1, "", "get_config_group"], [147, 12, 1, "", "has_config_group"], [147, 13, 1, "", "kv_dtype"], [147, 13, 1, "", "quant_algo"], [147, 13, 1, "", "quant_mode"], [147, 12, 1, "", "set_if_not_exist"], [147, 12, 1, "", "set_rank"], [147, 12, 1, "", "to_dict"], [147, 12, 1, "", "to_json_file"], [147, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[147, 12, 1, "", "check_config"], [147, 12, 1, "", "from_checkpoint"], [147, 12, 1, "", "from_config"], [147, 12, 1, "", "load"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "quantize"], [147, 12, 1, "", "release"], [147, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[147, 12, 1, "", "forward"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[147, 13, 1, "", "attn_processors"], [147, 11, 1, "", "config_class"], [147, 12, 1, "", "disable_forward_chunking"], [147, 12, 1, "", "enable_forward_chunking"], [147, 12, 1, "", "forward"], [147, 12, 1, "", "from_pretrained"], [147, 12, 1, "", "fuse_qkv_projections"], [147, 12, 1, "", "load"], [147, 12, 1, "", "prepare_inputs"], [147, 12, 1, "", "set_attn_processor"], [147, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[147, 11, 1, "", "AUTO"], [147, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [147, 11, 1, "", "EAGLE"], [147, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [147, 11, 1, "", "LOOKAHEAD_DECODING"], [147, 11, 1, "", "MEDUSA"], [147, 11, 1, "", "NGRAM"], [147, 11, 1, "", "NONE"], [147, 11, 1, "", "SAVE_HIDDEN_STATES"], [147, 11, 1, "", "USER_PROVIDED"], [147, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[147, 12, 1, "", "forward"], [147, 12, 1, "", "precompute_relative_attention_bias"], [147, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[148, 17, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[148, 15, 1, "", "bert_attention_plugin"], [148, 15, 1, "", "bert_context_fmha_fp32_acc"], [148, 15, 1, "", "context_fmha"], [148, 13, 1, "", "context_fmha_type"], [148, 16, 1, "", "convert_enable_disable"], [148, 15, 1, "", "dora_plugin"], [148, 15, 1, "", "dtype"], [148, 12, 1, "", "enable_paged_kv_cache"], [148, 15, 1, "", "fp8_rowwise_gemm_plugin"], [148, 12, 1, "", "from_arguments"], [148, 15, 1, "", "fuse_fp4_quant"], [148, 15, 1, "", "gemm_allreduce_plugin"], [148, 15, 1, "", "gemm_plugin"], [148, 15, 1, "", "gemm_swiglu_plugin"], [148, 15, 1, "", "gpt_attention_plugin"], [148, 15, 1, "", "identity_plugin"], [148, 12, 1, "", "is_context_fmha_enabled"], [148, 15, 1, "", "layernorm_quantization_plugin"], [148, 16, 1, "", "log_field_changes"], [148, 15, 1, "", "lora_plugin"], [148, 15, 1, "", "low_latency_gemm_plugin"], [148, 15, 1, "", "low_latency_gemm_swiglu_plugin"], [148, 15, 1, "", "mamba_conv1d_plugin"], [148, 15, 1, "", "manage_weights"], [148, 12, 1, "", "model_post_init"], [148, 15, 1, "", "moe_plugin"], [148, 15, 1, "", "multiple_profiles"], [148, 15, 1, "", "nccl_plugin"], [148, 15, 1, "", "norm_quant_fusion"], [148, 15, 1, "", "paged_kv_cache"], [148, 15, 1, "", "paged_state"], [148, 15, 1, "", "pp_reduce_scatter"], [148, 15, 1, "", "qserve_gemm_plugin"], [148, 15, 1, "", "quantize_per_token_plugin"], [148, 15, 1, "", "quantize_tensor_plugin"], [148, 15, 1, "", "reduce_fusion"], [148, 15, 1, "", "remove_input_padding"], [148, 15, 1, "", "rmsnorm_quantization_plugin"], [148, 12, 1, "", "set_context_fmha"], [148, 12, 1, "", "set_dora_plugin"], [148, 12, 1, "", "set_fp8_rowwise_quant_plugins"], [148, 12, 1, "", "set_lora_plugin"], [148, 12, 1, "", "set_nccl_plugin"], [148, 12, 1, "", "set_qserve_plugins"], [148, 12, 1, "", "set_smooth_quant_plugins"], [148, 15, 1, "", "smooth_quant_gemm_plugin"], [148, 15, 1, "", "smooth_quant_plugins"], [148, 15, 1, "", "streamingllm"], [148, 12, 1, "", "to_legacy_setting"], [148, 15, 1, "", "tokens_per_block"], [148, 15, 1, "", "use_fp8_context_fmha"], [148, 15, 1, "", "use_fused_mlp"], [148, 15, 1, "", "use_paged_context_fmha"], [148, 15, 1, "", "user_buffer"], [148, 12, 1, "", "validate"], [148, 16, 1, "", "validate_dtype_not_auto"], [148, 15, 1, "", "weight_only_groupwise_quant_matmul_plugin"], [148, 15, 1, "", "weight_only_quant_matmul_plugin"]], "tensorrt_llm.quantization": [[149, 10, 1, "", "QuantAlgo"], [149, 10, 1, "", "QuantMode"], [149, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[150, 10, 1, "", "ChatGLMGenerationSession"], [150, 10, 1, "", "EncDecModelRunner"], [150, 10, 1, "", "GenerationSequence"], [150, 10, 1, "", "GenerationSession"], [150, 10, 1, "", "KVCacheManager"], [150, 10, 1, "", "LogitsProcessor"], [150, 10, 1, "", "LogitsProcessorList"], [150, 10, 1, "", "ModelConfig"], [150, 10, 1, "", "ModelRunner"], [150, 10, 1, "", "ModelRunnerCpp"], [150, 10, 1, "", "MultimodalModelRunner"], [150, 10, 1, "", "QWenForCausalLMGenerationSession"], [150, 10, 1, "", "SamplingConfig"], [150, 10, 1, "", "Session"], [150, 10, 1, "", "StoppingCriteria"], [150, 10, 1, "", "StoppingCriteriaList"], [150, 10, 1, "", "TensorInfo"], [150, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[150, 12, 1, "", "encoder_run"], [150, 12, 1, "", "from_engine"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[150, 12, 1, "", "get_batch_idx"], [150, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[150, 11, 1, "", "batch_size"], [150, 11, 1, "", "buffer_allocated"], [150, 13, 1, "", "context_mem_size"], [150, 13, 1, "", "conv_kernel"], [150, 13, 1, "", "cross_attention"], [150, 11, 1, "", "cuda_graph_mode"], [150, 12, 1, "", "cuda_stream_guard"], [150, 11, 1, "", "debug_mode"], [150, 11, 1, "", "debug_tensors_to_save"], [150, 12, 1, "", "decode"], [150, 12, 1, "", "decode_batch"], [150, 12, 1, "", "decode_regular"], [150, 12, 1, "", "decode_stream"], [150, 11, 1, "", "device"], [150, 13, 1, "", "dtype"], [150, 12, 1, "", "dump_debug_buffers"], [150, 12, 1, "", "early_stop_criteria"], [150, 13, 1, "", "engine_inspector"], [150, 12, 1, "", "filter_medusa_logits"], [150, 12, 1, "", "finalize_decoder"], [150, 12, 1, "", "find_best_medusa_path"], [150, 13, 1, "", "first_layer"], [150, 13, 1, "", "gather_context_logits"], [150, 13, 1, "", "gather_generation_logits"], [150, 13, 1, "", "gemm_allreduce_plugin"], [150, 12, 1, "", "get_next_medusa_tokens"], [150, 12, 1, "", "get_num_heads_kv"], [150, 12, 1, "", "handle_per_step"], [150, 13, 1, "", "has_position_embedding"], [150, 13, 1, "", "has_token_type_embedding"], [150, 13, 1, "", "head_size"], [150, 13, 1, "", "hidden_size"], [150, 13, 1, "", "is_medusa_mode"], [150, 13, 1, "", "is_redrafter_mode"], [150, 13, 1, "", "kv_cache_type"], [150, 13, 1, "", "last_layer"], [150, 12, 1, "", "locate_accepted_draft_tokens"], [150, 11, 1, "", "mapping"], [150, 13, 1, "", "max_draft_tokens"], [150, 13, 1, "", "max_prompt_embedding_table_size"], [150, 12, 1, "", "medusa_decode_and_verify"], [150, 11, 1, "", "medusa_paths"], [150, 11, 1, "", "medusa_position_offsets"], [150, 11, 1, "", "medusa_temperature"], [150, 11, 1, "", "medusa_topks"], [150, 11, 1, "", "medusa_tree_ids"], [150, 12, 1, "", "next_medusa_input_ids"], [150, 11, 1, "", "num_draft_tokens"], [150, 13, 1, "", "num_heads"], [150, 13, 1, "", "num_layers"], [150, 13, 1, "", "num_medusa_heads"], [150, 13, 1, "", "paged_kv_cache"], [150, 13, 1, "", "paged_state"], [150, 12, 1, "", "pp_communicate_final_output_ids"], [150, 12, 1, "", "pp_communicate_new_tokens"], [150, 12, 1, "", "process_logits_including_draft"], [150, 13, 1, "", "profiler"], [150, 13, 1, "", "quant_mode"], [150, 13, 1, "", "remove_input_padding"], [150, 12, 1, "", "reorder_kv_cache_for_beam_search"], [150, 13, 1, "", "rnn_conv_dim_size"], [150, 13, 1, "", "rnn_head_size"], [150, 13, 1, "", "rnn_hidden_size"], [150, 11, 1, "", "runtime"], [150, 12, 1, "", "setup"], [150, 13, 1, "", "state_dtype"], [150, 13, 1, "", "state_size"], [150, 13, 1, "", "tokens_per_block"], [150, 12, 1, "", "update_output_ids_by_offset"], [150, 13, 1, "", "use_gemm_allreduce_plugin"], [150, 13, 1, "", "use_gpt_attention_plugin"], [150, 13, 1, "", "use_kv_cache"], [150, 13, 1, "", "use_lora_plugin"], [150, 13, 1, "", "use_mamba_conv1d_plugin"], [150, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[150, 12, 1, "", "add_sequence"], [150, 12, 1, "", "get_block_offsets"], [150, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[150, 11, 1, "", "conv_kernel"], [150, 11, 1, "", "cross_attention"], [150, 11, 1, "", "dtype"], [150, 12, 1, "", "from_model_config_cpp"], [150, 11, 1, "", "gather_context_logits"], [150, 11, 1, "", "gather_generation_logits"], [150, 11, 1, "", "gemm_allreduce_plugin"], [150, 11, 1, "", "gpt_attention_plugin"], [150, 11, 1, "", "gpu_weights_percent"], [150, 11, 1, "", "has_position_embedding"], [150, 11, 1, "", "has_token_type_embedding"], [150, 11, 1, "", "head_size"], [150, 11, 1, "", "hidden_size"], [150, 11, 1, "", "kv_cache_type"], [150, 11, 1, "", "language_adapter_config"], [150, 11, 1, "", "layer_types"], [150, 11, 1, "", "lora_plugin"], [150, 11, 1, "", "lora_target_modules"], [150, 11, 1, "", "mamba_conv1d_plugin"], [150, 11, 1, "", "max_batch_size"], [150, 11, 1, "", "max_beam_width"], [150, 11, 1, "", "max_medusa_tokens"], [150, 11, 1, "", "max_prompt_embedding_table_size"], [150, 11, 1, "", "model_name"], [150, 11, 1, "", "num_heads"], [150, 11, 1, "", "num_kv_heads"], [150, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [150, 11, 1, "", "num_kv_heads_per_layer"], [150, 11, 1, "", "num_layers"], [150, 11, 1, "", "num_medusa_heads"], [150, 11, 1, "", "paged_state"], [150, 11, 1, "", "quant_mode"], [150, 11, 1, "", "redrafter_draft_len_per_beam"], [150, 11, 1, "", "redrafter_num_beams"], [150, 11, 1, "", "remove_input_padding"], [150, 11, 1, "", "rnn_conv_dim_size"], [150, 11, 1, "", "rnn_head_size"], [150, 11, 1, "", "rnn_hidden_size"], [150, 11, 1, "", "skip_cross_attn_blocks"], [150, 11, 1, "", "skip_cross_kv"], [150, 11, 1, "", "state_dtype"], [150, 11, 1, "", "state_size"], [150, 11, 1, "", "tokens_per_block"], [150, 11, 1, "", "trtllm_modules_to_hf_modules"], [150, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[150, 13, 1, "", "dtype"], [150, 12, 1, "", "from_dir"], [150, 12, 1, "", "from_engine"], [150, 13, 1, "", "gather_context_logits"], [150, 13, 1, "", "gather_generation_logits"], [150, 12, 1, "", "generate"], [150, 13, 1, "", "hidden_size"], [150, 13, 1, "", "mapping"], [150, 13, 1, "", "max_prompt_embedding_table_size"], [150, 13, 1, "", "max_sequence_length"], [150, 13, 1, "", "num_heads"], [150, 13, 1, "", "num_layers"], [150, 13, 1, "", "remove_input_padding"], [150, 12, 1, "", "serialize_engine"], [150, 13, 1, "", "use_lora_plugin"], [150, 13, 1, "", "vocab_size"], [150, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[150, 13, 1, "", "dtype"], [150, 12, 1, "", "from_dir"], [150, 13, 1, "", "gather_context_logits"], [150, 13, 1, "", "gather_generation_logits"], [150, 12, 1, "", "generate"], [150, 13, 1, "", "hidden_size"], [150, 13, 1, "", "max_prompt_embedding_table_size"], [150, 13, 1, "", "max_sequence_length"], [150, 13, 1, "", "num_heads"], [150, 13, 1, "", "num_layers"], [150, 13, 1, "", "remove_input_padding"], [150, 13, 1, "", "vocab_size"], [150, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[150, 13, 1, "", "audio_engine_dir"], [150, 13, 1, "", "cpp_e2e"], [150, 13, 1, "", "cpp_llm_only"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "get_audio_features"], [150, 12, 1, "", "get_rope_index"], [150, 12, 1, "", "get_visual_features"], [150, 12, 1, "", "init_audio_encoder"], [150, 12, 1, "", "init_image_encoder"], [150, 12, 1, "", "init_llm"], [150, 12, 1, "", "init_processor"], [150, 12, 1, "", "init_tokenizer"], [150, 13, 1, "", "llm_engine_dir"], [150, 12, 1, "", "load_test_audio"], [150, 12, 1, "", "load_test_data"], [150, 12, 1, "", "prepare_position_ids_for_cogvlm"], [150, 12, 1, "", "preprocess"], [150, 12, 1, "", "ptuning_setup"], [150, 12, 1, "", "ptuning_setup_fuyu"], [150, 12, 1, "", "ptuning_setup_llava_next"], [150, 12, 1, "", "ptuning_setup_phi3"], [150, 12, 1, "", "ptuning_setup_pixtral"], [150, 13, 1, "", "python_e2e"], [150, 12, 1, "", "run"], [150, 12, 1, "", "setup_fake_prompts"], [150, 12, 1, "", "setup_fake_prompts_qwen2vl"], [150, 12, 1, "", "setup_fake_prompts_vila"], [150, 12, 1, "", "setup_inputs"], [150, 12, 1, "", "split_prompt_by_images"], [150, 12, 1, "", "tokenizer_image_token"], [150, 12, 1, "", "video_preprocess"], [150, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[150, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[150, 11, 1, "", "bad_words_list"], [150, 11, 1, "", "beam_search_diversity_rate"], [150, 11, 1, "", "early_stopping"], [150, 11, 1, "", "end_id"], [150, 11, 1, "", "frequency_penalty"], [150, 11, 1, "", "length_penalty"], [150, 11, 1, "", "max_attention_window_size"], [150, 11, 1, "", "max_new_tokens"], [150, 11, 1, "", "min_length"], [150, 11, 1, "", "min_p"], [150, 11, 1, "", "no_repeat_ngram_size"], [150, 11, 1, "", "num_beams"], [150, 11, 1, "", "num_return_sequences"], [150, 11, 1, "", "output_cum_log_probs"], [150, 11, 1, "", "output_log_probs"], [150, 11, 1, "", "output_sequence_lengths"], [150, 11, 1, "", "pad_id"], [150, 11, 1, "", "presence_penalty"], [150, 11, 1, "", "prompt_ignore_length"], [150, 11, 1, "", "random_seed"], [150, 11, 1, "", "repetition_penalty"], [150, 11, 1, "", "return_dict"], [150, 11, 1, "", "sink_token_length"], [150, 11, 1, "", "stop_words_list"], [150, 11, 1, "", "temperature"], [150, 11, 1, "", "top_k"], [150, 11, 1, "", "top_p"], [150, 11, 1, "", "top_p_decay"], [150, 11, 1, "", "top_p_min"], [150, 11, 1, "", "top_p_reset_ids"], [150, 12, 1, "", "update"], [150, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[150, 13, 1, "", "context"], [150, 13, 1, "", "context_mem_size"], [150, 13, 1, "", "engine"], [150, 12, 1, "", "from_engine"], [150, 12, 1, "", "from_serialized_engine"], [150, 12, 1, "", "infer_shapes"], [150, 12, 1, "", "run"], [150, 13, 1, "", "runtime"], [150, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[150, 11, 1, "", "dtype"], [150, 11, 1, "", "name"], [150, 12, 1, "", "numel"], [150, 11, 1, "", "shape"], [150, 12, 1, "", "squeeze"], [150, 12, 1, "", "view"]], "trtllm-bench": [[22, 18, 1, "cmdoption-trtllm-bench-log_level", "--log_level"], [22, 18, 1, "cmdoption-trtllm-bench-m", "--model"], [22, 18, 1, "cmdoption-trtllm-bench-model_path", "--model_path"], [22, 18, 1, "cmdoption-trtllm-bench-revision", "--revision"], [22, 18, 1, "cmdoption-trtllm-bench-w", "--workspace"], [22, 18, 1, "cmdoption-trtllm-bench-m", "-m"], [22, 18, 1, "cmdoption-trtllm-bench-w", "-w"]], "trtllm-bench-build": [[22, 18, 1, "cmdoption-trtllm-bench-build-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-no_weights_loading", "--no_weights_loading"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "--pp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "--quantization"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "--tp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-trust_remote_code", "--trust_remote_code"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "-pp"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "-q"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "-tp"]], "trtllm-bench-latency": [[22, 18, 1, "cmdoption-trtllm-bench-latency-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-latency-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-latency-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-latency-config", "--config"], [22, 18, 1, "cmdoption-trtllm-bench-latency-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-latency-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-latency-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-latency-config", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-medusa_choices", "--medusa_choices"], [22, 18, 1, "cmdoption-trtllm-bench-latency-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-latency-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-latency-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-latency-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-warmup", "--warmup"]], "trtllm-bench-throughput": [[22, 18, 1, "cmdoption-trtllm-bench-throughput-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-cluster_size", "--cluster_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-config", "--config"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-custom_module_dirs", "--custom_module_dirs"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-data_device", "--data_device"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--disable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--enable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-eos_id", "--eos_id"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-config", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-image_data_format", "--image_data_format"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", "--no_skip_tokenizer_init"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-output_json", "--output_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-request_json", "--request_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-scheduler_policy", "--scheduler_policy"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-streaming", "--streaming"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-warmup", "--warmup"]], "trtllm-eval": [[24, 18, 1, "cmdoption-trtllm-eval-backend", "--backend"], [24, 18, 1, "cmdoption-trtllm-eval-config", "--config"], [24, 18, 1, "cmdoption-trtllm-eval-disable_kv_cache_reuse", "--disable_kv_cache_reuse"], [24, 18, 1, "cmdoption-trtllm-eval-ep_size", "--ep_size"], [24, 18, 1, "cmdoption-trtllm-eval-config", "--extra_llm_api_options"], [24, 18, 1, "cmdoption-trtllm-eval-gpus_per_node", "--gpus_per_node"], [24, 18, 1, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [24, 18, 1, "cmdoption-trtllm-eval-log_level", "--log_level"], [24, 18, 1, "cmdoption-trtllm-eval-max_batch_size", "--max_batch_size"], [24, 18, 1, "cmdoption-trtllm-eval-max_beam_width", "--max_beam_width"], [24, 18, 1, "cmdoption-trtllm-eval-max_num_tokens", "--max_num_tokens"], [24, 18, 1, "cmdoption-trtllm-eval-max_seq_len", "--max_seq_len"], [24, 18, 1, "cmdoption-trtllm-eval-model", "--model"], [24, 18, 1, "cmdoption-trtllm-eval-pp_size", "--pp_size"], [24, 18, 1, "cmdoption-trtllm-eval-revision", "--revision"], [24, 18, 1, "cmdoption-trtllm-eval-tokenizer", "--tokenizer"], [24, 18, 1, "cmdoption-trtllm-eval-tp_size", "--tp_size"], [24, 18, 1, "cmdoption-trtllm-eval-trust_remote_code", "--trust_remote_code"]], "trtllm-eval-cnn_dailymail": [[24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", "--rouge_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_diamond": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_extended": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_main": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-system_prompt", "--system_prompt"]], "trtllm-eval-gsm8k": [[24, 18, 1, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", "--fewshot_as_multiturn"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-system_prompt", "--system_prompt"]], "trtllm-eval-json_mode_eval": [[24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", "--system_prompt"]], "trtllm-eval-longbench_v2": [[24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-cot", "--cot"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-difficulty", "--difficulty"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-domain", "--domain"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-length", "--length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_len", "--max_len"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-no_context", "--no_context"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-output_dir", "--output_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", "--prompts_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-rag", "--rag"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-start_idx", "--start_idx"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-system_prompt", "--system_prompt"]], "trtllm-eval-mmlu": [[24, 18, 1, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", "--accuracy_threshold"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-check_accuracy", "--check_accuracy"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_fewshot", "--num_fewshot"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-system_prompt", "--system_prompt"]], "trtllm-eval-mmmu": [[24, 18, 1, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-system_prompt", "--system_prompt"]], "trtllm-serve-disaggregated": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", "--metrics-log-interval"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "-l"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "-m"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-mm_embedding_serve": [[27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", "--extra_encoder_options"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", "MODEL"]], "trtllm-serve-serve": [[27, 18, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [27, 18, 1, "cmdoption-trtllm-serve-serve-chat_template", "--chat_template"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", "--cluster_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-config", "--config"], [27, 18, 1, "cmdoption-trtllm-serve-serve-context_parallel_size", "--context_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-context_parallel_size", "--cp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-custom_module_dirs", "--custom_module_dirs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", "--disagg_cluster_uri"], [27, 18, 1, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", "--enable_chunked_prefill"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", "--ep_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-config", "--extra_llm_api_options"], [27, 18, 1, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", "--fail_fast_on_attention_window_too_large"], [27, 18, 1, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", "--free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-serve-free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [27, 18, 1, "cmdoption-trtllm-serve-serve-media_io_kwargs", "--media_io_kwargs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_cluster_parallel_size", "--moe_cluster_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-moe_expert_parallel_size", "--moe_expert_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [27, 18, 1, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", "--otlp_traces_endpoint"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", "--pipeline_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pipeline_parallel_size", "--pp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-revision", "--revision"], [27, 18, 1, "cmdoption-trtllm-serve-serve-server_role", "--server_role"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tensor_parallel_size", "--tensor_parallel_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tool_parser", "--tool_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tensor_parallel_size", "--tp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["py", "pydantic_model", "Python model"], "18": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "py:pydantic_model", "18": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 41, 42, 44, 55, 59, 60, 63, 65, 66, 70, 82, 84, 86, 88, 89, 91, 92, 96, 98, 100, 101, 102, 103, 104, 105, 107, 108, 110, 113, 114, 115, 116, 118, 119, 120, 125, 127, 128, 129, 130, 131, 132, 136, 137, 139, 140, 141, 142, 143, 145, 146, 147, 148, 150, 151, 153, 155, 156, 158, 159, 160, 162, 163, 164, 165, 166, 167, 168, 170, 172, 173, 174, 176, 181], "0": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 83, 84, 85, 86, 88, 89, 90, 92, 94, 96, 97, 99, 103, 106, 107, 108, 110, 113, 114, 115, 117, 118, 119, 121, 122, 125, 126, 128, 129, 130, 131, 135, 136, 137, 142, 143, 144, 145, 146, 147, 150, 151, 154, 155, 156, 157, 159, 160, 161, 162, 163, 165, 168, 169, 170, 172, 173, 174, 175, 177, 180, 182], "00": [8, 13, 31, 44, 67, 68, 69, 129, 136, 137, 155], "000": [2, 8, 11, 44, 109, 136, 162], "0000": [44, 136, 137, 159], "0012": 136, "00128": 27, "0017": 45, "003": 45, "0047": 155, "0058": 29, "0060": [29, 30], "0062": 29, "0063": 29, "0068": 32, "00688362121582": 27, "007": 45, "0070": 155, "0071": 155, "0075": 33, "007f": 159, "0080": 33, "0083": 33, "0086": 33, "0087": 32, "0096": 155, "00978": 153, "01": [7, 13, 67, 68, 69, 74, 136, 151, 164], "0101": 32, "0105": 2, "014": 5, "0158": 137, "0162": 139, "0165": 142, "02": [31, 164], "020": 45, "0235": 155, "024": 24, "026": 45, "0260": 155, "027": 45, "0273": 155, "028": 45, "0294": 155, "03": [26, 31, 142, 155, 164], "032": 13, "0339": 45, "035": 45, "03762": 145, "03961": 116, "03x": 14, "04": [31, 101, 113, 157, 164, 178], "0449": 155, "04532": 148, "045471": 16, "046": 45, "0461": 2, "0463": 45, "048": 45, "049": 45, "05": [145, 146, 147, 155, 164], "051": 45, "05100": 145, "0523": 155, "0528": [12, 28, 29, 36, 45], "0554": 137, "0560": 155, "0563": 45, "057": 45, "06": [13, 19, 136, 145, 146, 164], "061": 45, "0630": 155, "0669": 2, "0675": 2, "0682": 155, "0689e": 136, "07": [7, 13, 164], "0704": 137, "0713": 155, "0723": 155, "0732": 155, "074": 45, "0772": 2, "0776": 155, "078": 45, "079": 19, "08": [13, 26, 142], "0804": 155, "081947": 16, "082": 45, "0838": [2, 45], "088": 45, "0881": 143, "09": [13, 155], "0903": 155, "0910": 155, "092": 19, "092314": 16, "092623": 16, "093256": 16, "09353": 122, "094": 24, "096": 24, "0964": 45, "09685": 122, "09f": [0, 1], "0cf2f6f154b4a5765d89945b20aa3449b2be7933": 20, "0e": 118, "0f": [0, 118], "0rc0": 109, "0rc1": [21, 44, 136], "0rc6": [26, 29, 30, 32, 33, 111, 163], "0u": 1, "0x": 4, "0x0000000000000000": 164, "0x2cc": 31, "0x328fc": 31, "0x32c78": 31, "0xffff9638274c": 31, "0xffff963828fc": 31, "0xffff96382c78": 31, "1": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 17, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 42, 43, 44, 46, 48, 49, 50, 51, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 83, 84, 85, 86, 89, 90, 92, 94, 97, 98, 99, 101, 102, 104, 106, 107, 108, 109, 111, 113, 114, 115, 117, 118, 119, 121, 122, 125, 126, 128, 130, 131, 135, 136, 137, 138, 139, 142, 144, 145, 146, 147, 149, 150, 152, 154, 155, 156, 159, 161, 162, 163, 168, 169, 170, 172, 174, 175, 177, 178, 179, 180, 181], "10": [0, 2, 7, 8, 9, 10, 13, 14, 16, 17, 19, 20, 21, 29, 44, 45, 50, 51, 53, 66, 73, 74, 86, 97, 99, 108, 109, 121, 122, 125, 136, 137, 139, 143, 145, 151, 154, 155, 159, 170, 177], "100": [0, 2, 8, 10, 16, 20, 24, 26, 31, 43, 44, 53, 57, 68, 71, 97, 121, 124, 135, 136, 137], "1000": [0, 18, 43, 44, 45, 135, 136, 137, 159], "10000": [145, 146, 147], "1003": 164, "100gb": 15, "100m": 109, "101": [20, 121], "101029": 16, "101253": 27, "101256": 27, "101978": 45, "102": [4, 20, 121], "1024": [1, 2, 5, 7, 11, 16, 18, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 45, 50, 51, 65, 71, 86, 99, 108, 118, 128, 136, 137, 142, 145, 146, 155, 156, 159, 170, 177], "10240": 73, "102415": [44, 136], "103": [16, 20, 121], "1039": 34, "104": [20, 164], "1041": 21, "10438": 153, "1045": 155, "1047": [44, 136], "105": [20, 45], "1050": 155, "1051": 137, "1059": [44, 136], "106": [20, 45], "106563": 45, "107": [20, 45], "1072": 155, "107501": 45, "10774": 0, "1079": 131, "107u": 12, "108": [20, 45], "1082": 155, "10858": 50, "109": [16, 20, 45], "10b": [145, 164], "10m": 4, "11": [0, 2, 5, 7, 8, 16, 17, 19, 20, 45, 102, 122, 125, 136, 139, 145, 155], "110": 20, "11023": [44, 136], "110804": 45, "110b": 164, "111": [4, 13, 20], "111302": 45, "111618": 45, "111668": 45, "1118": 164, "112": [20, 45], "1123": 164, "113": 20, "1134": 151, "113420": 16, "1135": 155, "114": [16, 20, 45], "1141": 155, "114688": 2, "1148": 164, "11489": 2, "11490": 136, "115": [16, 20], "1151": 2, "115378": 16, "115716": 45, "116": [20, 45], "1160": [27, 35], "117": [20, 45], "1178": [44, 136], "118": 20, "1181": 164, "1183": 164, "119": [16, 20, 44, 136], "11943": [44, 136], "11947": 50, "1196": 2, "119648": 16, "11b": [154, 161, 164], "11x": 17, "12": [0, 4, 8, 13, 16, 17, 18, 19, 20, 26, 31, 45, 50, 86, 101, 102, 109, 113, 122, 128, 136, 139, 142, 145, 155, 170, 178], "120": [12, 16, 20], "120b": [109, 161], "121": 20, "1212": 155, "121847": 136, "1219": 2, "122": [20, 136], "1225": 145, "12288": [44, 136], "123": [20, 53, 54], "1234": [147, 159], "1239": 164, "124": 20, "1242": 164, "1245": 26, "1248": 164, "125": [16, 20, 136], "1252": [44, 131, 136], "1256": 164, "1257": 2, "125m": [125, 128], "126": [20, 44, 136], "1267": 164, "127": [20, 145], "1272": 155, "128": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 20, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 45, 50, 53, 54, 68, 73, 83, 84, 108, 117, 121, 122, 126, 129, 136, 147, 159, 164, 168, 170, 172], "1284": 164, "1287": 139, "128798": 159, "128799": 159, "128e": [18, 45, 90, 175], "128k": [10, 90, 175], "129": [16, 20, 45], "1290": 155, "1291504": 137, "1293": 131, "12945": 2, "129498": 2, "13": [6, 15, 16, 17, 19, 20, 45, 113, 117, 122, 136, 137, 145, 154, 155], "130": [20, 45], "1300": 59, "131": [20, 45], "131072": [9, 136, 137], "13195": [44, 136], "132": [20, 136], "1323": 164, "1328": 164, "1329": 164, "133": [20, 164], "133120": 30, "13368": 136, "1337": 164, "134": [20, 45], "1341": 2, "1343": 164, "1344": 164, "135": [20, 45], "13525": 136, "13598": [44, 136], "136": [20, 45], "137": [20, 44, 45, 136], "1378": 155, "138": [16, 20], "139": [20, 45], "1392": 164, "13b": [4, 90, 91, 109, 175, 176], "14": [7, 16, 17, 19, 20, 26, 44, 45, 122, 128, 136, 139, 142, 143, 155], "140": [8, 20, 31], "1408": 31, "140g": 131, "141": [5, 20], "1418": [44, 136], "141gb": [3, 45], "142": [15, 16, 20, 45], "1424": 164, "143": 20, "1436": [2, 164], "1437": 155, "144": 139, "1446": 164, "1447": 164, "14480": [44, 136], "1449": 164, "145": [142, 143], "1459": 155, "146": [45, 142, 143], "1467": 164, "147": [137, 139, 142, 143], "14702": 10, "148": 45, "1480": 164, "1486": 164, "149": [155, 164], "15": [13, 16, 17, 18, 19, 20, 26, 34, 35, 45, 122, 136, 143, 145, 155], "150": [20, 43, 86, 135, 170], "1500": 45, "15043": 50, "151": 45, "1514": 164, "152": [16, 44, 45, 136], "1529": 164, "153": 26, "1534": 164, "1535": 164, "1536": 2, "1537": 164, "1539": 164, "154": [13, 26, 27], "1552": 164, "1556": 155, "15585": [44, 136], "1562": 164, "1564": [137, 142, 143], "157": 45, "158": 2, "1583": 164, "1584": 2, "1585": 137, "1589": 164, "1590": 164, "1597": 139, "15b": [90, 175], "15u": 15, "16": [0, 2, 4, 7, 8, 11, 12, 13, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 45, 46, 48, 49, 65, 67, 68, 69, 83, 84, 86, 92, 99, 108, 110, 117, 122, 123, 125, 129, 136, 137, 138, 145, 146, 147, 151, 153, 155, 168, 170, 172, 177], "160": [16, 164], "1607": [44, 136], "161": [27, 35, 44, 136], "162": 16, "1625": 139, "1626": 164, "163": 3, "163061": 10, "163062": 10, "1637": 164, "16384": [29, 30, 32, 33, 139, 142], "164": [13, 16], "1642": 164, "1643": 26, "165": 45, "1650": 164, "1651165696": 20, "166": 45, "1660": 164, "1664": 30, "1669": 164, "167": [44, 136], "1671517696": 20, "1672": 155, "1674": 164, "1675": 164, "167507": 16, "1676": 164, "168": 13, "16e": [19, 33, 36, 90, 154, 161, 175], "16x": [14, 151], "17": [0, 2, 16, 18, 19, 20, 21, 29, 30, 32, 33, 44, 45, 122, 136, 142, 155, 157], "170": 45, "1706": 145, "171": 16, "1721": 155, "1723": 164, "172321": 2, "17233": 2, "173": [13, 45], "1732": 164, "17323": 153, "1738": 164, "1741966075": 163, "1742": 164, "17453": [23, 148], "17453v3": 1, "1748018634": 18, "175": 45, "1753843963": [32, 33], "1754294810": 29, "1754358426": 21, "1755815898": 30, "1759022940": [34, 35], "175b": 5, "176": 136, "1762": 164, "1764866686": 31, "1774995776": 20, "1776": [90, 175], "1799": 164, "17b": [18, 19, 36, 45, 90, 154, 161, 175], "17x": 20, "18": [15, 16, 19, 20, 31, 45, 122, 136, 155], "180": [11, 13, 151], "1806": 2, "180b": [7, 44, 136], "180gb": 45, "181": 45, "1815": 164, "181540": 2, "182": 45, "1822": 50, "1834": 164, "185": [4, 44, 136], "1851": 164, "18527": 50, "18563": [44, 136], "1861": 143, "1866": 143, "187": 16, "188415": 20, "188416": 20, "1885": 137, "1886": 164, "1897": 164, "19": [2, 16, 19, 20, 21, 45, 143, 155], "190": 45, "1909": 164, "192": [3, 12, 16], "1926": 164, "192gb": 45, "1937": 164, "1939": 164, "194": 45, "1944": 142, "195": 45, "1950": 16, "1953": 164, "1959": [44, 136], "1963": 16, "198": [13, 19], "1985": 164, "1987": 164, "199": 45, "1993": [64, 155], "1999": 164, "1_1b": [90, 175], "1_405b": 129, "1_70b": [26, 129], "1_output": 26, "1b": [17, 27, 42, 46, 48, 49, 53, 55, 56, 57, 59, 60, 61, 62, 63, 66, 70, 71, 72, 75, 77, 79, 80, 85, 86, 89, 90, 92, 113, 154, 156, 158, 161, 163, 164, 169, 170, 174, 175], "1d": [83, 117, 145, 150, 170], "1e": [128, 145, 146, 147], "1e20f": 1, "1g": 155, "1gb": [92, 99, 114, 177], "1k": [2, 12, 13, 14, 15, 20, 21, 29], "1k1k_tp1_conc16": [28, 30, 36], "1k1k_tp1_conc32": [28, 30, 36], "1k1k_tp1_conc4": [28, 30, 36], "1k1k_tp1_conc64": [28, 30, 36], "1k1k_tp1_conc8": [28, 30, 36], "1k1k_tp2_conc16": [28, 30, 36], "1k1k_tp2_conc32": [28, 30, 36], "1k1k_tp2_conc4": [28, 30, 36], "1k1k_tp2_conc64": [28, 30, 36], "1k1k_tp2_conc8": [28, 30, 36], "1k1k_tp4_conc128": [28, 29, 36], "1k1k_tp4_conc16": [28, 29, 30, 36], "1k1k_tp4_conc256": [28, 29, 36], "1k1k_tp4_conc32": [28, 29, 30, 36], "1k1k_tp4_conc4": [28, 29, 30, 36], "1k1k_tp4_conc64": [28, 29, 30, 36], "1k1k_tp4_conc8": [28, 29, 30, 36], "1k1k_tp8_conc128": [28, 29, 36], "1k1k_tp8_conc16": [28, 29, 30, 36], "1k1k_tp8_conc256": [28, 29, 36], "1k1k_tp8_conc32": [28, 29, 30, 36], "1k1k_tp8_conc4": [28, 29, 30, 36], "1k1k_tp8_conc64": [28, 29, 30, 36], "1k1k_tp8_conc8": [28, 29, 30, 36], "1k2k": 21, "1k8k_tp1_conc16": [28, 30, 36], "1k8k_tp1_conc32": [28, 30, 36], "1k8k_tp1_conc4": [28, 30, 36], "1k8k_tp1_conc64": [28, 30, 36], "1k8k_tp1_conc8": [28, 30, 36], "1k8k_tp2_conc16": [28, 30, 36], "1k8k_tp2_conc32": [28, 30, 36], "1k8k_tp2_conc4": [28, 30, 36], "1k8k_tp2_conc64": [28, 30, 36], "1k8k_tp2_conc8": [28, 30, 36], "1k8k_tp4_conc16": [28, 30, 36], "1k8k_tp4_conc32": [28, 30, 36], "1k8k_tp4_conc4": [28, 30, 36], "1k8k_tp4_conc64": [28, 30, 36], "1k8k_tp4_conc8": [28, 30, 36], "1k8k_tp8_conc16": [28, 30, 36], "1k8k_tp8_conc32": [28, 30, 36], "1k8k_tp8_conc4": [28, 30, 36], "1k8k_tp8_conc64": [28, 30, 36], "1k8k_tp8_conc8": [28, 30, 36], "1m": 143, "1st": [4, 26, 29, 30, 32, 33, 145, 151], "1u": [0, 1], "1x": [13, 21], "1xgpu": 21, "1xh200": 3, "1xtep": 17, "1ytic": 164, "2": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 17, 21, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 44, 45, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 83, 84, 85, 86, 88, 90, 92, 95, 98, 99, 102, 104, 105, 108, 109, 111, 113, 114, 115, 117, 118, 119, 120, 121, 122, 123, 125, 126, 128, 129, 131, 136, 138, 139, 142, 143, 145, 147, 150, 153, 154, 155, 159, 161, 162, 163, 168, 169, 170, 172, 173, 175, 177, 181], "20": [10, 11, 15, 16, 17, 19, 20, 26, 27, 29, 30, 31, 32, 33, 34, 45, 72, 75, 77, 78, 80, 99, 118, 125, 126, 136, 137, 142, 145, 150, 155, 159, 177], "200": [5, 9, 16, 18, 21, 29, 30, 31, 32, 33, 34, 35, 66, 86, 150, 159, 170], "2000": [15, 45], "20000": [38, 45, 159], "200mb": 15, "2017": 142, "2018": 164, "202": 16, "2023": [3, 155], "2024": [13, 101, 178], "2025": [2, 11, 13, 31, 136], "2028": 164, "2033": 143, "2039": 164, "204": [13, 45], "2040": 164, "2042": 2, "2044": [142, 143], "2045": 142, "2048": [2, 3, 5, 6, 15, 22, 23, 26, 29, 30, 31, 32, 33, 44, 45, 73, 86, 92, 128, 136, 137, 139, 140, 141, 142, 143, 147, 150, 155, 159, 164, 170], "205": [16, 45], "2056": 164, "206": 45, "20627": 50, "20685": [44, 136], "2079": 155, "208": 45, "2081": [139, 142, 164], "2087": 164, "2089": 45, "209": [16, 45], "20b": 164, "20k": 21, "21": [2, 7, 13, 16, 19, 20, 45, 125, 142, 155], "2101": 116, "2102": 45, "2106": 122, "2107": 155, "210g": 131, "211": 13, "2113": 164, "212": [16, 45], "2135": 164, "214": 19, "2145200": 94, "2145206": 94, "215": 30, "2152": 164, "2158": 45, "2168": 2, "2169": 164, "21747": [44, 136], "2176": 45, "21764": [44, 136], "2182": 164, "2191": 164, "22": [15, 16, 20, 29, 32, 33, 42, 45, 113, 133, 145, 155], "220": 45, "22000": 45, "22056": [44, 136], "221": [45, 136], "2210": 153, "2211": [145, 153], "2219": 164, "222": 45, "22213": [44, 136], "2225": 155, "2232": 164, "224": 146, "2243": 164, "225": [30, 45], "2263": 164, "227": 6, "2288": 164, "2294": 164, "22b": [90, 175], "22x": 14, "23": [8, 16, 19, 20, 44, 45, 136, 155, 164], "2305": 155, "2306": 153, "2309": [1, 23, 148], "232": [6, 16], "234": 16, "2352": 164, "2357": 164, "235b": [35, 90, 175], "236": 13, "2366": 164, "2370": 164, "2373": 164, "2379": 164, "238": 45, "2388": 164, "239": 13, "2397": [44, 136], "24": [0, 16, 20, 44, 45, 113, 136, 155, 157, 164], "2401": 0, "2402": 122, "2405": 148, "24189": 45, "2419": 164, "2425": 164, "243": 16, "2439": 164, "244": 45, "245": 13, "2458": 164, "246": 16, "2461": 142, "2466": 142, "2473": 164, "2474": [139, 142], "2484": 164, "2485": 164, "2487": 45, "249": 13, "24b": [154, 164], "24mib": 16, "25": [6, 8, 13, 16, 17, 20, 44, 45, 136, 154, 164], "250": [2, 13, 16, 108], "2500": 45, "2503": 154, "25032": [44, 136], "251": [16, 45], "252u": 15, "253": [13, 45], "253b": [90, 175], "2552": 164, "256": [1, 2, 3, 6, 12, 15, 16, 20, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 65, 67, 73, 84, 94, 97, 108, 136, 145, 147, 155, 156, 159, 164, 168, 172], "25603": [44, 136], "257": 45, "2573": 164, "2581": [139, 142], "2590780": 136, "259840": 151, "26": [16, 20, 45, 136, 139, 163], "2602": 50, "2628": [142, 143], "263": [3, 50], "2640": 143, "2649": 155, "266": 45, "2671": 2, "2677": 164, "26778": [44, 136], "2679": 139, "2685": 164, "2691": 164, "27": [20, 45, 164], "270m": [90, 175], "271": 45, "2712": 164, "274": [2, 45, 164], "2742": 137, "275": 164, "2755": 2, "2766684": 2, "278": 50, "2782": 164, "2787": 164, "2796": 164, "27b": [90, 154, 175], "27th": 34, "28": [13, 20, 31, 45, 136, 155], "2820": 155, "283": 45, "28390": 136, "287113": 136, "288": [16, 29, 164], "29": [20, 151, 164], "290": 16, "2939": 155, "295": 45, "296": 45, "297": 50, "29892": 50, "299": [13, 44, 136], "29962": 50, "2998": 155, "2b": [44, 90, 131, 136, 175], "2cta": 15, "2d": [125, 145, 146, 153, 170], "2gib": 164, "2k": [2, 13, 14, 15, 21], "2m": 143, "2nd": 145, "2u": 1, "2x": [4, 5, 10, 17], "2xb200_nvl": [28, 30, 36], "2xdep": 17, "2xh200_sxm": [28, 30, 36], "3": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 21, 24, 26, 27, 29, 33, 36, 40, 44, 55, 60, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 83, 84, 86, 90, 92, 93, 94, 98, 102, 104, 106, 108, 109, 113, 115, 117, 119, 121, 122, 130, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 150, 152, 155, 156, 158, 159, 160, 161, 162, 163, 164, 165, 168, 170, 172, 175, 179, 180], "30": [0, 2, 10, 13, 17, 18, 19, 20, 125, 137, 139, 143, 145, 151, 159, 162], "300": [6, 16, 44, 136], "3000": [19, 44, 45, 136], "30000": 45, "30065": [44, 136], "300k": 19, "3019": [44, 136], "3021": 2, "3022": [44, 136], "303": 5, "3031": 142, "304": 50, "3040": [137, 142, 143], "305": 45, "306": 50, "306u": 12, "307": [26, 45], "3072": [2, 108], "3073": 108, "309": 45, "3095": 164, "30990": [44, 136], "30b": [7, 35, 36, 90, 154, 161, 175], "30it": 31, "30x": 7, "31": [8, 20, 45, 123, 137, 142, 143, 164], "3106": 164, "311": 45, "312": 8, "3121": 27, "3126": 27, "3132": [44, 136], "315": 13, "3159": 11, "316": 45, "319": 45, "31st": 12, "32": [0, 1, 2, 4, 5, 11, 12, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 50, 56, 57, 64, 73, 83, 84, 86, 95, 108, 117, 121, 123, 136, 137, 145, 146, 147, 148, 150, 151, 153, 155, 159, 163, 164, 168, 170, 172], "3200": 29, "3201": 137, "321": [45, 136], "322": 50, "323": 45, "325": 45, "326": 45, "3276": [137, 142, 143], "32768": [30, 73, 145], "329": 45, "3291": 155, "32b": [90, 154, 161, 164, 175], "32k": [10, 164], "32x": 7, "33": [8, 10, 19, 20, 45, 155, 164], "330": 45, "331": 45, "332": 45, "3328": 155, "332826": 2, "3338": 137, "336": 45, "338": [13, 50], "3389": 139, "339": 45, "339447": 16, "339981": 16, "33x": 14, "34": [2, 8, 16, 20, 45], "340": [11, 13, 45], "341": [5, 16], "342": 45, "3442": 155, "3445": 155, "345": 45, "3452": [44, 136, 155], "3476": 2, "348": 45, "348gib": 16, "349": [5, 45], "34b": [90, 164, 175], "35": [0, 20, 45, 57, 97], "350": 30, "3504": 16, "351": 45, "352": 45, "3555": 155, "356": 45, "36": [13, 16, 19, 20, 138, 139], "360": 30, "36384": 2, "3671": [44, 136], "367714": 16, "368": 13, "369": 45, "37": [8, 16, 19, 20, 45, 136], "370318": 20, "3763": 13, "3764": 164, "378": 45, "38": [8, 20, 31, 44, 136], "381": 16, "384": [2, 35, 45], "3863": 45, "387b12598a9e": 136, "3887": 155, "389": 45, "39": [8, 13, 20, 45, 67], "391": 45, "3914": 45, "393": 45, "3936": [44, 136], "3977": 155, "399": 45, "3_1": [90, 154, 161, 175], "3_3": [90, 154, 161, 175], "3_output": 32, "3b": [27, 47, 54, 76, 90, 154, 175], "3d": [83, 117, 145, 150], "3rd": 145, "3u": 1, "3x": [7, 13, 15], "4": [0, 1, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 50, 57, 65, 67, 68, 69, 71, 84, 86, 90, 92, 99, 101, 102, 103, 104, 106, 107, 108, 109, 119, 121, 122, 123, 125, 129, 131, 136, 137, 139, 140, 141, 142, 143, 144, 145, 147, 148, 150, 151, 152, 153, 154, 155, 159, 161, 162, 164, 168, 170, 172, 175, 177, 178], "40": [19, 20, 30, 34, 45, 72, 118, 139, 145, 162, 164], "400": [15, 16], "4000": [15, 19], "401": 45, "403": 164, "405b": [44, 90, 136, 138, 164, 175], "4060": 151, "4066": 50, "407": 45, "408348": 16, "4089": 143, "4096": [3, 11, 15, 26, 29, 30, 34, 44, 45, 50, 97, 136, 139, 145, 146, 150], "40b": 7, "40x": 7, "41": 20, "410": 45, "4101": 164, "41020": 136, "411": [44, 136], "4117e": [44, 136], "4133": 143, "4135": 164, "41375": 136, "414": 2, "4141": 164, "416": 31, "41607": 136, "4168": 2, "4192": 155, "42": [16, 20, 30, 34, 45, 136], "420": [21, 22], "421": 45, "422": 45, "4224": 45, "4227": 164, "4248": 139, "4265": 136, "427": [44, 136], "4280": 13, "43": [19, 20, 31, 45, 151, 163], "431": 45, "43146": 2, "434": 45, "435": 45, "437": 45, "4384": 16, "44": [10, 16, 19, 20, 26, 45, 151], "4408": 50, "442": 45, "4439": [44, 136], "4456": 45, "447": 45, "449": 164, "4493": [142, 143], "4495": 16, "4497": 45, "44x": 7, "45": [16, 20, 34, 121, 154, 164], "450": 45, "45000000000": 121, "450m": [90, 175], "452151": 31, "453": 45, "4532": 164, "4548": 2, "456": 45, "4566": 45, "457": 45, "458676": 16, "459": 45, "46": [7, 20], "4600": 15, "461014": 16, "463": 45, "464": [16, 45], "465004": 20, "4653": 50, "4656": 45, "4667": 45, "4678": 164, "47": [7, 16, 20, 139], "4701": [44, 136], "472": 50, "476": 45, "4767": 164, "478": 164, "4798": 164, "47x": 7, "48": [16, 20, 139, 151, 164], "4809": 164, "480gb": [16, 20], "481": 4, "482": 164, "4853": 164, "489795": 20, "489935": 20, "49": [16, 20, 139], "491": [16, 45], "49152": 2, "494": 45, "495": 45, "496": [45, 123], "4963": [44, 136], "4963654": 39, "498043": 16, "499": [8, 45], "4993": 164, "49b": [90, 154, 161, 175], "4b": [90, 164, 175], "4bit": 3, "4gb": 15, "4gpu": 21, "4k": [20, 90, 175], "4u": 1, "4x": [3, 4, 5, 17, 21, 109], "4xb200_nvl": [28, 29, 30, 36], "4xgpu": 21, "4xh200_sxm": [28, 30, 36], "5": [0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 27, 29, 30, 31, 32, 33, 34, 35, 44, 45, 47, 54, 59, 71, 72, 76, 90, 92, 102, 104, 106, 108, 121, 122, 125, 126, 128, 136, 142, 145, 147, 150, 152, 154, 155, 159, 161, 164, 175, 180], "50": [0, 7, 8, 10, 11, 16, 17, 20, 30, 44, 59, 71, 72, 86, 99, 136, 159, 164, 170, 177], "500": [13, 15, 30, 45], "5000": 45, "500000": 147, "5007": 50, "500m": 7, "50272": 128, "5029": 164, "505": 45, "5064": 45, "5073": 155, "50m": 16, "51": [20, 67, 69], "511": 45, "512": [1, 2, 5, 6, 20, 22, 24, 26, 29, 30, 31, 35, 45, 51, 71, 108, 122, 126, 136, 139, 142, 147, 159], "5120": 2, "512mb": [92, 114], "513": 45, "5141": 20, "518": 50, "51b": [90, 154, 161, 164, 175], "51x": 7, "52": [16, 20, 45, 67], "5213": 20, "5215": 20, "5224": 20, "52269": 45, "524288": 20, "526": 164, "52667": 45, "5284": 20, "529514": 16, "5299": 139, "53": [16, 20, 67, 136, 142, 143, 164], "530": 45, "5305": 139, "535": 18, "5371": 164, "5373": 164, "537602": 16, "538": 45, "5393": 2, "54": [7, 8, 20, 67], "540": [44, 45, 136], "5417": 164, "5436": 164, "5443839": 2, "54576": 2, "5496": 139, "5497": 45, "55": [7, 10, 19, 20, 69, 136], "5500": 45, "551": 45, "5510": [44, 136], "5514": [44, 136, 164], "5519": 164, "552": [8, 18], "5520": 164, "5530": 45, "5531": 164, "5534": 164, "554": 45, "5558": 164, "556": 45, "5563": 164, "5564": 164, "5568": 164, "5570": 20, "558": 45, "56": [7, 16, 20, 26, 68], "560": 3, "562": [122, 126], "5636": 164, "564": 16, "5642": 164, "564272": 16, "566": 45, "5669": 164, "568": 136, "5698": 164, "57": [16, 20, 45, 68, 136], "570": 16, "572": 45, "5739": 2, "5742": [139, 142], "575": [29, 30, 31, 32, 33, 34, 35], "5761": 164, "5772": 164, "5779": 164, "5782": 164, "579": 45, "58": [13, 16, 20, 35, 68, 142], "5800": 164, "5801": 164, "5809": 164, "581": 45, "5815": 164, "5816": 164, "5821": 45, "5823": 164, "5825": 164, "5830": 155, "5835": 164, "5855": 164, "5874": 155, "5877": 139, "5879": 155, "58x": 13, "59": [10, 16, 19, 20, 26, 136], "590": 50, "5900": 164, "5902": 164, "5904": 164, "5907ed752eb44d11a12893b19f79f8ca": 31, "591": 45, "5918": 155, "5921": 164, "5925": 164, "5931": 164, "594": 45, "5941": 164, "5947": 164, "5949": 164, "5954": 164, "5957": 155, "5963": 20, "5975": 164, "5976": 139, "5980": 139, "599": 45, "5992": 164, "5b": [90, 164, 175], "5k": 21, "5m": 21, "5th": [15, 145], "5u": 1, "5vl": [26, 164], "5x": [4, 7, 13, 17], "6": [0, 1, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 29, 31, 32, 33, 34, 45, 92, 118, 121, 122, 125, 145, 150, 154, 155, 159, 164], "60": [0, 8, 10, 16, 19, 20, 159], "600": 132, "6000": [10, 44, 45, 136, 162, 164], "6014": 164, "602": 45, "603": 45, "6049": 139, "605": 45, "6059": [44, 136], "6064": 155, "6065": 164, "6075": 164, "608": 45, "6080": 164, "61": [20, 26, 97], "610": 45, "6100": 2, "6103": 164, "612328": 16, "613": 45, "6136": 164, "6140": 164, "615": 45, "6157": 155, "616": 45, "617": 45, "61954812": 40, "62": [13, 19, 20, 45, 142], "622": 26, "623": [26, 45], "623219": 16, "6255": 155, "626": 50, "6299": 155, "63": [19, 20, 45, 73, 110, 136, 142, 147, 151, 159], "630": 45, "6300": 10, "63266": 137, "633": 30, "63307": 137, "63308": 137, "63331": 137, "63374": 137, "634": 45, "6344": 164, "63456": 137, "6345624": 137, "636": 45, "6372": 139, "6376": 2, "639": 164, "6393": 12, "64": [0, 1, 2, 4, 5, 11, 12, 16, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 45, 47, 54, 70, 73, 76, 83, 84, 88, 98, 99, 108, 117, 118, 128, 136, 142, 145, 146, 147, 151, 159, 164, 168, 172, 173, 177], "640": [3, 21], "640g": 31, "640gb": 15, "6429": 164, "643": 45, "6437": 164, "644": 45, "6447": 164, "6452": 143, "646": 45, "6475": 142, "649": 164, "6499": 10, "64ac201c77bf46a7a3a4eca7759b1fd8": 34, "64k": 95, "64x": 13, "65": [10, 20], "650": 45, "65024": 155, "651": 45, "65100": 2, "651199": 16, "6523": 143, "653": 8, "6538": 12, "654": 5, "6550": 139, "65536": 20, "6554": 142, "658": 45, "6591": [44, 136], "66": [13, 19, 20, 29, 30, 32, 33, 45], "660": 45, "662": 45, "6628": [142, 143], "664": 8, "665": 45, "666": 45, "6678": 151, "6684": 143, "6695": 151, "67": [7, 13, 16, 20], "6704": 10, "671": 2, "67108864": [9, 18, 21, 26, 110, 163], "671b": 14, "672": 45, "673": 164, "675": 136, "6753e": [44, 136], "676": 45, "6769": 142, "677": 45, "6774": 10, "678": 45, "679": [4, 45], "68": [7, 13, 20, 45, 143], "6825": [44, 136], "684": 13, "6852": [139, 142], "6862": 136, "6890": 155, "69": [7, 13, 16, 20, 143, 163], "6925": [44, 136], "6938": 50, "6948": 10, "695": [45, 164], "697": [15, 45], "6973": 12, "6975": 139, "6976": [137, 142, 143], "699": 45, "6a": 3, "6b": [4, 44, 90, 136, 145, 164, 175], "6x": [5, 17, 109], "7": [0, 1, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 29, 30, 31, 33, 34, 35, 44, 45, 56, 72, 73, 86, 92, 107, 110, 121, 122, 136, 137, 145, 150, 155, 170], "70": [0, 7, 8, 16, 19, 20, 143, 151], "700": 132, "7000": [44, 136], "701": 164, "703": 10, "7031": 139, "705": [15, 30, 164], "7063": [44, 136], "707": 45, "7072": 45, "708": 19, "709": 136, "7090": 155, "70b": [5, 7, 10, 24, 26, 36, 44, 83, 90, 107, 109, 131, 137, 139, 140, 141, 142, 143, 144, 154, 161, 164, 175], "70g": 131, "71": [13, 20, 136], "712": 45, "7128": 30, "7134": 155, "7136": 137, "714": 45, "7144": 155, "715": 45, "7155": 12, "7168": [2, 13, 15], "717498": 16, "7187": 45, "7188": 2, "72": [10, 16, 20, 30, 138, 164], "720": [30, 34], "7206": 2, "722": 45, "724": 45, "727": 45, "728516": 16, "72b": [154, 161, 164], "73": [13, 19, 20, 35], "734": 45, "738": 45, "7382": 45, "739": [45, 164], "73x": 17, "74": [13, 20, 30, 34], "741": [45, 164], "7422": 12, "744": 45, "7456": 2, "74561": 2, "7480": 137, "7481": 10, "749": 45, "74x": 11, "75": [7, 16, 18, 20, 30, 31, 136, 164], "750": 5, "7502": 137, "7520": 2, "755": [45, 132], "7571": 12, "7584": 2, "75903": 45, "76": 20, "7607": 142, "7610": 12, "7612": 12, "762": 45, "7621": 45, "7638": [137, 142, 143], "7657": 2, "766": 45, "76665782272": 27, "767": 45, "768": [24, 31, 128, 146], "77": [16, 19, 20, 26], "770": 45, "7743": 137, "7770": 137, "78": [13, 20, 139], "780": [44, 136], "781": 45, "782": 45, "783": 45, "7842": 139, "785": 45, "78509": 45, "787": 45, "7871": 10, "7876": 142, "789": 45, "7891": 12, "7898": 12, "79": [20, 45, 136, 151], "790": 10, "7900": 155, "791": 16, "792": 16, "7933": 142, "794": 164, "7949": 155, "795": 45, "797": 45, "7977": [12, 139], "798": 45, "799": 45, "7a": 3, "7b": [7, 11, 26, 27, 44, 45, 68, 78, 90, 100, 116, 122, 125, 126, 136, 154, 161, 164, 175], "7b_model": 26, "7k": 11, "7x": [4, 13, 17], "8": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 50, 51, 55, 60, 61, 62, 63, 66, 67, 68, 69, 71, 72, 73, 74, 83, 84, 86, 88, 99, 103, 106, 108, 113, 116, 117, 121, 122, 123, 128, 129, 131, 136, 137, 138, 139, 144, 145, 146, 147, 148, 151, 153, 155, 156, 158, 159, 163, 168, 170, 172, 173, 177, 180], "80": [0, 5, 12, 13, 15, 20, 110, 118, 164], "800": [3, 30, 164], "8000": [9, 17, 18, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 46, 47, 48, 49, 51, 53, 54, 75, 76, 77, 78, 79, 80, 92, 94, 99, 163, 172, 177], "8001": [17, 92], "8002": [17, 92, 136], "8003": [17, 92], "8004": 17, "8005": 45, "803": [3, 8], "804": 45, "8044": 12, "8048": 136, "80833": 26, "80b": [34, 36, 161], "80gb": [4, 7, 45, 137, 139, 140, 141], "81": [13, 16, 20, 139], "812": 45, "813": 45, "8140": 2, "8149": 155, "817": 45, "8179": 155, "819": 5, "8192": [11, 12, 23, 26, 28, 29, 30, 36, 45, 51, 102, 136, 137, 142, 145, 146, 155, 159, 164], "81920": 73, "82": [13, 20, 139], "820": 136, "821": 45, "8212": [0, 1, 31], "8218": 155, "8225": 139, "825": 164, "8259": [44, 136], "828": 45, "829": 45, "83": [20, 26], "830": 11, "8307": 143, "831": 45, "833": 45, "835": 45, "8351": [44, 136], "838": 45, "8393": 32, "84": [8, 13, 20, 26], "844": 45, "8441": [44, 136], "8448": 31, "849": 45, "84d2f12": 12, "85": [2, 7, 20, 26, 136, 164], "852": 45, "854": 45, "856": 45, "86": [10, 20, 110], "863": [44, 136], "8672": 155, "87": [7, 8, 10, 16, 20, 30, 68], "870": 45, "871": 45, "872": 45, "876": 45, "8779": 155, "878": 19, "88": [20, 26, 139, 143], "8804": 137, "880676": 16, "881": 45, "88226": [44, 136], "8828": 155, "8841": 139, "8870": 32, "89": [7, 10, 13, 20, 110, 154], "8908": 33, "893": 45, "8932": [44, 136], "895": 45, "8958": 143, "896": 2, "898": 45, "8984": 33, "899": 45, "8a": 6, "8b": [10, 24, 44, 55, 60, 64, 65, 73, 74, 84, 86, 90, 94, 100, 104, 106, 107, 113, 136, 154, 156, 158, 161, 163, 168, 170, 172, 175, 179, 180], "8bit": 4, "8k": [12, 20, 29, 164], "8k1k_tp1_conc16": [28, 30, 36], "8k1k_tp1_conc32": [28, 30, 36], "8k1k_tp1_conc4": [28, 30, 36], "8k1k_tp1_conc64": [28, 30, 36], "8k1k_tp1_conc8": [28, 30, 36], "8k1k_tp2_conc16": [28, 30, 36], "8k1k_tp2_conc32": [28, 30, 36], "8k1k_tp2_conc4": [28, 30, 36], "8k1k_tp2_conc64": [28, 30, 36], "8k1k_tp2_conc8": [28, 30, 36], "8k1k_tp4_conc128": [28, 29, 36], "8k1k_tp4_conc16": [28, 29, 30, 36], "8k1k_tp4_conc256": [28, 29, 36], "8k1k_tp4_conc32": [28, 29, 30, 36], "8k1k_tp4_conc4": [28, 29, 30, 36], "8k1k_tp4_conc64": [28, 29, 30, 36], "8k1k_tp4_conc8": [28, 29, 30, 36], "8k1k_tp8_conc128": [28, 29, 36], "8k1k_tp8_conc16": [28, 29, 30, 36], "8k1k_tp8_conc256": [28, 29, 36], "8k1k_tp8_conc32": [28, 29, 30, 36], "8k1k_tp8_conc4": [28, 29, 30, 36], "8k1k_tp8_conc64": [28, 29, 30, 36], "8k1k_tp8_conc8": [28, 29, 30, 36], "8tb": 5, "8x": [15, 17, 18, 21], "8x22b": [90, 175], "8x7b": [44, 116, 136, 154, 161, 164], "8xb200": [13, 18], "8xb200_nvl": [28, 29, 30, 36], "8xgpu": [15, 21], "8xh100": 6, "8xh200": 3, "8xh200_sxm": [28, 29, 30, 36], "9": [0, 1, 4, 11, 13, 14, 16, 17, 19, 20, 21, 26, 30, 32, 33, 40, 45, 56, 67, 68, 69, 72, 84, 97, 113, 122, 125, 131, 139, 145, 155, 159, 168, 172], "90": [0, 2, 16, 19, 20, 30, 44, 45, 56, 72, 97, 110, 124, 136, 137, 139, 144, 151, 159], "9007": 2, "902": 45, "9028": 155, "907": 4, "9075": 33, "908": 45, "9087": 143, "909": 45, "91": 20, "910": 45, "9101": 45, "9115": 143, "912": 8, "912656": 2, "916": 45, "9184": 139, "9189": 33, "919": 45, "9197": 2, "92": [13, 20, 26], "9203": 139, "9214": 45, "9216": 29, "922": 45, "9223372036854775807": 159, "9236": 31, "924": [24, 45, 128], "925": 19, "9263": 2, "9274": [10, 137], "93": [2, 16, 20], "931": 45, "932": 45, "933": 45, "9348": 32, "935": 164, "9353e": 137, "9356": 32, "937": 45, "9379": 2, "939": 45, "94": [20, 68], "94022": 45, "941": [3, 6, 45], "943": 45, "944": 45, "9447": 29, "946": 3, "9462": 29, "948": [16, 45], "949": 45, "9494": 142, "95": [11, 20, 27, 35, 45, 55, 60, 61, 62, 63, 66, 71, 72, 73, 113, 137, 144, 156, 158, 163], "9500": 29, "9521": 155, "953": 45, "9537": 139, "9538": 29, "954": [15, 45], "955200": 16, "957": 45, "96": [3, 13, 15, 19, 20, 139, 164], "960": 3, "9606": 15, "960gb": 16, "961": 45, "9613": 15, "9623": 142, "9629": 15, "9639": 45, "965": 45, "96583": 45, "967": 164, "9692": 155, "97": [15, 20, 136, 139], "972": 45, "976442": 16, "977": 45, "978": 45, "98": 20, "981": 45, "983": 164, "9845": 26, "987": 164, "9898": 2, "99": [13, 16, 19, 20, 121, 132], "992": 164, "9928": 143, "993": 45, "9938": 2, "994": 45, "9980": 26, "9982": [142, 143], "9b": [90, 175], "9f": 0, "9x": [5, 6], "A": [0, 1, 2, 7, 8, 10, 13, 16, 18, 19, 24, 26, 29, 30, 32, 33, 34, 35, 36, 39, 43, 44, 45, 64, 65, 66, 67, 68, 69, 70, 83, 90, 91, 92, 96, 97, 102, 103, 107, 108, 109, 114, 115, 117, 118, 120, 122, 125, 128, 129, 131, 135, 136, 145, 150, 152, 154, 156, 158, 159, 160, 161, 164, 165, 167, 175], "AND": 145, "And": [11, 14, 15, 16, 34, 107, 125, 131, 145, 146, 151], "As": [10, 11, 12, 14, 16, 17, 19, 20, 21, 29, 50, 84, 92, 98, 110, 116, 117, 119, 122, 125, 129, 139, 143, 144, 145, 151, 153, 155, 164, 167, 168, 181], "At": [10, 15, 20, 21, 24, 41, 70, 101, 127, 139, 146, 151, 178], "Being": 39, "But": [11, 16, 20, 34, 83, 108, 117, 120], "By": [0, 1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 30, 40, 42, 50, 83, 91, 92, 108, 110, 113, 114, 118, 124, 125, 136, 139, 143, 145, 155, 159, 167, 176], "For": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 42, 44, 45, 50, 55, 56, 64, 67, 68, 69, 81, 82, 83, 84, 85, 86, 88, 91, 92, 94, 95, 97, 98, 99, 100, 102, 103, 105, 107, 108, 109, 110, 112, 113, 114, 115, 117, 118, 119, 120, 121, 122, 124, 125, 128, 129, 130, 131, 136, 137, 138, 139, 142, 143, 144, 145, 148, 150, 151, 155, 156, 158, 159, 160, 163, 164, 165, 166, 167, 168, 169, 170, 172, 173, 176, 177, 181, 182], "If": [0, 1, 2, 7, 9, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 64, 66, 82, 83, 91, 92, 94, 96, 97, 98, 102, 104, 106, 107, 108, 110, 111, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 142, 143, 144, 145, 147, 148, 150, 151, 154, 155, 158, 159, 160, 163, 164, 165, 167, 170, 176, 181, 182], "In": [0, 1, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 34, 40, 41, 44, 45, 50, 70, 90, 91, 92, 95, 96, 98, 100, 105, 107, 108, 109, 110, 111, 119, 120, 123, 125, 129, 130, 131, 133, 136, 137, 138, 139, 142, 143, 145, 151, 153, 154, 155, 159, 162, 163, 164, 167, 175, 176, 181, 182], "It": [0, 1, 2, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 44, 45, 50, 64, 66, 67, 68, 70, 72, 83, 91, 92, 94, 96, 97, 98, 102, 103, 105, 107, 108, 109, 110, 113, 115, 117, 118, 119, 122, 125, 127, 129, 130, 136, 139, 140, 141, 142, 143, 144, 145, 148, 153, 155, 158, 159, 160, 162, 163, 165, 166, 167, 170, 176, 182], "Its": [11, 65, 83, 102, 117, 145, 162, 167], "NOT": [64, 108, 145], "Near": 8, "No": [0, 16, 27, 34, 38, 44, 70, 92, 93, 108, 114, 121, 136, 137, 152, 161], "Not": [1, 7, 21, 97, 108], "ON": [136, 139, 142, 143], "OR": 145, "Of": [13, 29, 164], "On": [12, 16, 19, 20, 30, 43, 83, 95, 108, 110, 113, 117, 121, 135, 138, 143, 145, 164], "One": [10, 11, 16, 20, 93, 97, 128, 129, 142, 145, 155, 161, 166], "Or": [2, 34, 110, 145, 150, 179], "TO": [8, 67, 68, 69], "That": [11, 34, 39, 41, 45, 83, 102, 108, 115, 117, 118, 121, 129, 139, 145, 159], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 82, 84, 85, 86, 87, 88, 89, 90, 91, 92, 94, 95, 96, 99, 100, 101, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 127, 128, 129, 130, 131, 135, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182], "Their": [15, 18], "Then": [11, 12, 14, 16, 20, 26, 27, 34, 44, 86, 98, 122, 131, 132, 136, 137, 145, 158, 160, 165, 170, 181], "There": [5, 10, 13, 14, 15, 16, 17, 19, 29, 30, 50, 83, 91, 92, 102, 107, 110, 112, 113, 114, 117, 118, 119, 120, 121, 122, 128, 131, 145, 148, 151, 153, 155, 158, 164, 166, 167, 176, 181, 182], "These": [3, 5, 6, 8, 10, 11, 12, 13, 15, 16, 17, 20, 29, 30, 32, 33, 34, 35, 38, 40, 44, 50, 82, 92, 96, 104, 107, 125, 131, 136, 137, 138, 146, 148, 164], "To": [2, 5, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 44, 45, 83, 85, 89, 91, 92, 96, 97, 98, 101, 102, 103, 104, 105, 106, 107, 108, 110, 115, 117, 121, 122, 124, 125, 126, 129, 130, 131, 135, 136, 139, 142, 143, 144, 145, 151, 153, 157, 158, 159, 160, 162, 163, 164, 165, 167, 169, 174, 176, 178, 181, 182], "WITH": 72, "Will": [0, 1, 12], "With": [10, 11, 12, 16, 17, 19, 21, 30, 31, 50, 65, 83, 92, 98, 102, 109, 114, 117, 118, 125, 129, 132, 136, 159], "_": [0, 10, 11, 38, 42, 101, 102, 108, 115, 130], "_1": [26, 29, 30, 32, 33], "_2": [26, 29, 30, 32, 33], "__all__": [160, 165], "__call__": [66, 106], "__dict__": 159, "__file__": 64, "__getitem__": 159, "__global__": 12, "__init__": [11, 38, 64, 66, 83, 91, 108, 119, 127, 129, 130, 136, 155, 159, 160, 164, 165, 167, 176, 182], "__main__": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 70, 71, 72, 73, 74, 94, 113, 137, 139, 143, 144, 156, 158, 160, 163, 164, 165], "__name__": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 70, 71, 72, 73, 74, 94, 113, 137, 139, 143, 144, 156, 158, 160, 163, 164, 165], "__post_init__": 164, "__pydantic_extra__": 159, "__pydantic_fields_set__": 159, "__repr__": 164, "__traceback__": 159, "__version__": [111, 113], "_autodeploi": [22, 27, 84, 168, 172], "_autodeployllmarg": 164, "_callback": [91, 176], "_capac": 1, "_checkpoint_format": [91, 176], "_chunk_token": 64, "_config_load": [91, 176], "_context_logits_auto_en": 159, "_count": 38, "_cpp_gen": 115, "_create_tensor": 129, "_custom_transform_funct": [91, 176], "_executor_loop": 107, "_fields_set": 159, "_file_path": 64, "_forward_step": [42, 101], "_generation_logits_auto_en": 159, "_get_config_dict": [86, 170], "_get_pretrained_config": [91, 176], "_handl": 1, "_hash_token": 64, "_hf_model_dir": 159, "_len_": 38, "_limit": 38, "_llm_arg": 64, "_load_pretrained_config": [91, 176], "_map": [91, 176], "_mark_output": 155, "_metadata": 64, "_modelformatkind": 159, "_mpi_sess": 159, "_n": [26, 29, 30, 32, 33], "_note": [83, 117], "_parallelconfig": 159, "_parent_namespace_depth": 159, "_path": 2, "_postproc_param": 159, "_postprocess_result": 159, "_process_previous_batch": [42, 101], "_ratio": 38, "_releas": 1, "_return_log_prob": 159, "_run": 155, "_runtim": 150, "_sample_async": [42, 101], "_savehiddenstatesdecodingconfig__context": 159, "_schedul": [42, 101], "_size": 38, "_sliding_window_pattern": 147, "_static": 129, "_str_to_trt_dtype_dict": 145, "_tensorrt_engin": 156, "_torch": [10, 44, 64, 73, 83, 86, 88, 91, 95, 107, 108, 136, 157, 159, 160, 164, 165, 167, 170, 173, 176, 179], "_torchllm": 159, "_types_namespac": 159, "_unsign": 1, "_util": 145, "_was_": 159, "_weight_load": [91, 176], "_weight_mapp": [91, 176], "a100": [21, 39, 109, 118, 131, 162], "a100x": 39, "a2": 164, "a22b": [35, 90, 175], "a2a": [12, 164], "a3b": [34, 35, 36, 90, 154, 161, 175], "a8": 153, "a_": 145, "a_1": 145, "a_2": 145, "a_n": 145, "a_sf": 145, "aarch64": 154, "ab": [23, 64, 122, 145, 148, 153, 159], "abbrevi": [12, 27, 94], "abc": [11, 14], "abc123def456": 35, "abcd": 14, "abi": [110, 164], "abil": [1, 44, 136, 164], "abl": [4, 10, 13, 16, 20, 44, 83, 102, 107, 113, 117, 136, 142, 145, 164], "ablat": [10, 14, 15], "abnorm": [16, 164], "abort": [159, 164], "about": [0, 1, 2, 3, 4, 6, 7, 11, 15, 16, 17, 20, 21, 30, 34, 44, 64, 70, 72, 88, 92, 96, 108, 109, 115, 131, 136, 137, 139, 140, 141, 143, 145, 151, 155, 159, 163, 164, 170, 173], "abov": [2, 7, 11, 12, 15, 16, 17, 18, 20, 21, 26, 27, 29, 30, 32, 33, 34, 35, 40, 41, 44, 45, 50, 83, 92, 103, 108, 110, 113, 122, 123, 129, 131, 136, 137, 139, 143, 151], "absenc": [17, 118], "absolut": 9, "absorb": 13, "abstract": [42, 107, 143, 146, 159], "abstractsetintstr": 159, "ac": 164, "acc": [145, 148], "acceler": [4, 5, 6, 7, 11, 12, 16, 21, 23, 29, 30, 32, 33, 83, 89, 102, 107, 117, 123, 125, 148, 162, 174], "accept": [0, 1, 2, 10, 11, 12, 16, 18, 19, 20, 21, 22, 24, 38, 39, 50, 55, 60, 61, 62, 63, 83, 92, 107, 108, 109, 110, 113, 125, 137, 139, 145, 150, 156, 158, 159, 163, 164, 167, 170], "accept_length": 150, "acceptance_length_threshold": 159, "acceptance_window": 159, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [10, 16, 18, 20, 21, 29, 30, 31, 34, 35, 40, 41, 59, 69, 82, 94, 96, 115, 133, 136, 137, 145, 159, 163, 164], "accessor": 1, "accommod": [17, 92, 116, 166, 181], "accomplish": 138, "accord": [10, 12, 18, 19, 21, 83, 117, 130, 145, 146, 167], "accordingli": [10, 18, 19, 130, 159], "account": [2, 8, 17, 31, 67, 68, 69, 107, 110, 129, 132], "accumul": [0, 8, 11, 16, 20, 23, 66, 117, 118, 145, 148, 150, 159], "accur": [3, 14, 44, 59, 92, 114, 136, 137, 162, 164], "accuraci": [2, 3, 11, 12, 13, 15, 21, 23, 24, 31, 34, 35, 139, 144, 145, 148, 153, 163, 164], "accuracy_threshold": 24, "achiev": [2, 3, 7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 30, 34, 35, 45, 57, 98, 107, 108, 109, 110, 125, 137, 139, 142, 144, 159, 160, 165], "acknowledg": 109, "acquir": [10, 20], "across": [0, 5, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 26, 27, 29, 30, 32, 33, 34, 35, 38, 39, 41, 45, 64, 67, 71, 73, 83, 92, 95, 96, 98, 103, 108, 109, 114, 116, 117, 118, 119, 129, 130, 138, 139, 142, 143, 145, 150, 159, 162, 170], "act": [13, 16, 17, 92, 96, 104], "act_fn": 146, "act_typ": [129, 145], "action": [65, 73], "activ": [0, 3, 4, 7, 13, 15, 16, 17, 21, 23, 30, 38, 42, 83, 89, 92, 98, 103, 104, 105, 117, 119, 129, 138, 145, 148, 153, 154, 159, 164, 174, 182], "activation_scaling_factor": 128, "activationtyp": [129, 145], "active_request": [107, 182], "actor": 105, "actual": [2, 7, 8, 13, 14, 16, 18, 20, 22, 23, 26, 30, 34, 35, 38, 64, 91, 96, 102, 107, 108, 119, 120, 125, 139, 142, 143, 144, 159, 164, 176, 181], "ad": [1, 8, 11, 12, 14, 15, 17, 19, 20, 24, 30, 43, 56, 83, 86, 92, 97, 99, 102, 107, 108, 109, 117, 118, 119, 121, 125, 126, 131, 133, 135, 138, 142, 143, 145, 147, 150, 157, 159, 164, 166, 170, 177], "ada": [7, 83, 104, 110, 117, 139, 154, 162, 164], "adalayernorm": 146, "adalayernormcontinu": 146, "adalayernormzero": 146, "adalayernormzerosingl": 146, "adapt": [0, 14, 16, 20, 22, 41, 58, 89, 103, 105, 109, 122, 145, 146, 159, 160, 162, 164, 165, 174], "adapter1": [99, 177], "adapter2": [99, 177], "adapter_id": 159, "adapter_s": 122, "adapters": 1, "add": [1, 9, 11, 14, 19, 21, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 44, 83, 91, 94, 108, 110, 115, 117, 119, 122, 127, 128, 129, 131, 132, 133, 136, 137, 139, 143, 145, 150, 155, 158, 159, 160, 163, 164, 165, 176, 181], "add_": 10, "add_activ": 129, "add_argu": [65, 70, 71, 73], "add_bias_linear": 147, "add_generation_prompt": [13, 94], "add_input": 145, "add_not": 159, "add_output": 145, "add_padding_request": 181, "add_prefix_spac": 66, "add_qkv_bia": 147, "add_rmsnorm": 13, "add_sequ": 150, "add_special_token": [11, 13, 29, 32, 66, 73, 150, 159, 164], "addbadhandl": 1, "addcumlogprob": 164, "added_kv_proj_dim": 146, "added_proj_bia": 146, "addit": [0, 5, 8, 10, 11, 14, 15, 16, 17, 19, 20, 27, 29, 30, 32, 33, 34, 35, 38, 41, 44, 45, 50, 59, 64, 68, 83, 85, 86, 90, 91, 92, 100, 102, 107, 108, 109, 110, 112, 113, 117, 118, 122, 125, 129, 131, 136, 138, 139, 142, 145, 146, 153, 154, 155, 159, 164, 167, 169, 170, 175, 176, 181], "addition": [12, 20, 83, 107, 136, 137, 139, 143, 160, 165, 167], "additional_context_output": [82, 159], "additional_generation_output": [82, 159], "additional_model_output": 159, "additional_opt": 69, "additionalmodeloutput": [0, 115], "additionaloutput": [0, 115], "addr": 0, "address": [1, 2, 7, 8, 10, 13, 15, 16, 31, 69, 103, 108, 130, 143, 151, 158, 159, 164], "addresswiths": 1, "adequ": [146, 159], "adher": 59, "adirondack": [30, 34], "adjust": [9, 16, 17, 18, 21, 29, 30, 32, 33, 34, 35, 44, 66, 67, 68, 69, 92, 114, 136, 137, 151, 159, 182], "admin": 113, "adopt": [10, 16, 19, 118, 131, 164], "adp": [10, 109, 164], "advanc": [6, 10, 12, 13, 14, 15, 16, 17, 23, 55, 60, 61, 63, 82, 83, 86, 92, 96, 103, 109, 113, 125, 129, 145, 148, 156, 158, 163, 164, 167, 170], "advantag": [16, 17, 19, 20, 21, 92, 97, 118], "advers": [3, 23, 148], "advertis": [44, 136], "afd": 95, "affect": [2, 8, 10, 12, 23, 86, 102, 123, 131, 137, 139, 142, 143, 148, 151, 164, 170], "affin": 146, "aforement": [10, 16, 40], "after": [0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 41, 56, 64, 69, 82, 83, 89, 90, 92, 96, 97, 102, 103, 104, 107, 108, 110, 114, 115, 117, 119, 120, 121, 122, 125, 129, 130, 132, 136, 139, 142, 143, 144, 145, 146, 148, 151, 159, 163, 164, 167, 174, 175, 182], "afterward": 12, "again": [10, 16, 65, 129, 137, 139, 143, 155], "against": [19, 44, 110, 136, 159, 164], "agent": [0, 5, 10, 11, 159], "agentconnect": 41, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": [12, 15, 16, 17, 52, 92], "aggress": [12, 14, 56, 128, 139, 144], "agnost": [21, 164], "agre": [32, 33, 158], "agreement": 158, "ahead": [0, 18, 117, 125], "ai": [2, 4, 11, 12, 13, 16, 18, 20, 21, 27, 28, 29, 32, 36, 43, 51, 55, 56, 57, 60, 61, 62, 63, 66, 71, 74, 90, 113, 135, 137, 144, 145, 154, 156, 158, 161, 162, 163, 164, 175], "aidc": 164, "aim": [2, 13, 16, 21, 41, 42, 44, 116, 128, 136, 137, 139, 164], "aime2025": 30, "aime24": 11, "aime25": 30, "ainsli": 3, "air": [30, 164], "airport": 30, "aka": 145, "akhoroshev": 164, "al": [3, 19], "albani": [30, 34], "albeit": 125, "alessionetti": 164, "algo": [73, 74], "algorithm": [0, 7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 44, 73, 74, 83, 107, 109, 117, 118, 125, 128, 129, 131, 136, 139, 145, 159, 162, 164], "alia": [38, 146, 147, 159], "alias": 159, "alibi": 145, "alibi_bias_max": [145, 146], "alibi_scal": 145, "alibi_slop": 145, "alibi_with_scal": 145, "align": [1, 8, 12, 19, 20, 100, 108, 113, 164, 182], "align_corn": 145, "aligneddata": 12, "all": [0, 1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 29, 30, 32, 33, 34, 35, 36, 38, 39, 44, 45, 57, 64, 67, 68, 69, 70, 71, 72, 73, 83, 86, 90, 91, 92, 95, 96, 97, 99, 102, 103, 107, 108, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 122, 125, 129, 130, 131, 136, 137, 138, 139, 142, 143, 144, 145, 146, 148, 150, 151, 153, 154, 155, 159, 163, 164, 167, 170, 175, 176, 177, 181, 182], "all2al": [16, 20, 164], "all_clos": 164, "all_reduc": 164, "all_reduce_param": [145, 146], "allbitset": [0, 1], "allgath": [12, 15, 20, 23, 108, 129, 143, 145, 148, 164], "allgather_list": 108, "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 18, 20, 24, 27, 29, 42, 50, 56, 69, 71, 83, 85, 92, 96, 98, 102, 107, 108, 110, 114, 117, 120, 121, 144, 145, 150, 151, 155, 159, 164, 166, 167, 169, 181, 182], "allocateipcmemori": 1, "allocnewblock": [0, 27], "allocnewblocksperrequest": 0, "alloctotalblock": [0, 27], "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 164], "allow": [0, 1, 3, 6, 10, 11, 12, 15, 16, 17, 20, 21, 23, 29, 30, 32, 33, 34, 35, 38, 40, 41, 42, 43, 44, 45, 64, 69, 82, 91, 92, 96, 97, 98, 99, 101, 102, 103, 104, 106, 108, 114, 115, 117, 118, 121, 125, 128, 135, 136, 137, 138, 139, 142, 143, 145, 148, 155, 158, 159, 162, 163, 164, 166, 170, 176, 177, 178, 182], "allow_advanced_sampl": 159, "allow_pickl": 159, "allreduc": [13, 15, 23, 108, 129, 143, 145, 148, 159, 164], "allreduce_gemm": 124, "allreduce_strategi": [123, 159, 164], "allreducebuff": 1, "allreducefusionkernel": 13, "allreducefusionop": 145, "allreduceparam": [145, 146], "allreducestrategi": [123, 145], "alltoal": [20, 164], "alltoall_prepare_maybe_dispatch": 164, "alltoallprepar": 20, "almost": [10, 15, 16, 129, 139, 142, 151], "alon": [8, 116], "along": [10, 14, 20, 82, 83, 90, 110, 117, 125, 145, 164, 170, 175], "alongsid": [16, 89, 90, 174, 175], "alpaca": 122, "alpha": [145, 146, 159, 164], "alphabet": [145, 159], "alreadi": [0, 2, 11, 12, 13, 14, 15, 16, 17, 19, 21, 38, 42, 64, 83, 91, 102, 108, 117, 119, 121, 139, 142, 144, 145, 159, 160, 164, 165, 181], "also": [0, 2, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 44, 45, 50, 56, 57, 66, 83, 91, 92, 97, 98, 102, 107, 110, 111, 113, 114, 115, 117, 119, 121, 125, 128, 129, 130, 131, 136, 137, 138, 139, 140, 141, 142, 145, 146, 148, 151, 153, 158, 159, 160, 162, 163, 164, 165, 166, 167, 181], "altair": 164, "alter": [97, 115, 119], "altern": [10, 13, 20, 21, 24, 39, 44, 65, 89, 91, 94, 110, 115, 124, 136, 160, 165, 166, 174, 176], "although": [10, 17, 20, 44, 86, 92, 114, 119, 129, 136, 139, 143, 170], "alwai": [0, 1, 10, 15, 16, 19, 20, 38, 64, 83, 86, 94, 102, 107, 115, 117, 118, 121, 128, 129, 131, 142, 143, 145, 155, 159, 170], "always_share_across_beam": 150, "am": [55, 60, 61, 63, 113, 137, 144, 150, 156, 158, 163], "ambigu": [1, 31, 38], "amc23": 11, "amd": 164, "amen": [0, 10, 94, 115, 159], "america": 94, "american": 64, "among": [10, 17, 19, 41, 97, 133, 145], "amongst": 145, "amort": 19, "amount": [0, 15, 16, 20, 23, 44, 98, 102, 121, 129, 136, 142, 144, 148, 150, 151, 155, 159], "amp": 21, "amper": [4, 21, 104, 110, 154, 162, 164], "amplifi": 10, "amsterdam": 30, "amtrak": 30, "an": [0, 1, 3, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 42, 44, 45, 50, 55, 59, 60, 61, 62, 63, 64, 65, 66, 82, 83, 86, 88, 89, 90, 91, 92, 94, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 113, 114, 115, 117, 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 148, 150, 151, 153, 155, 156, 158, 159, 160, 163, 164, 165, 166, 167, 170, 172, 173, 174, 175, 176, 177, 181, 182], "analog": 40, "analys": 16, "analysi": [11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 82, 109, 119, 151], "analysispatternmanag": 119, "analysisth": 30, "analyt": 4, "analyz": [12, 20, 66, 82, 119, 137], "andlength": 22, "ani": [0, 1, 2, 8, 10, 12, 14, 15, 16, 20, 27, 30, 36, 38, 39, 40, 42, 44, 45, 64, 67, 82, 83, 86, 91, 96, 97, 102, 108, 110, 115, 119, 120, 125, 130, 131, 136, 142, 143, 144, 145, 147, 148, 150, 155, 159, 160, 165, 166, 167, 170, 176], "annot": [38, 159], "announc": [2, 3, 4, 6], "anoth": [0, 1, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 29, 30, 32, 33, 34, 35, 36, 83, 92, 98, 102, 117, 119, 122, 131, 142, 145, 155, 158, 167, 182], "answer": [11, 14, 19, 21, 30, 31, 34, 59, 66, 71, 94], "answer_suffix": 11, "answer_suffix_with_mark": 11, "antialia": 145, "antonin": [55, 113, 156, 158, 163], "anybitset": [0, 1], "anymor": [16, 107], "anyof": 148, "anyth": [45, 64, 70, 159], "aot_module_simplifi": 108, "aotman": 164, "apart": [41, 50], "api": [2, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 24, 26, 36, 42, 43, 44, 45, 50, 51, 55, 58, 64, 68, 69, 71, 82, 88, 89, 90, 91, 92, 98, 100, 105, 110, 114, 118, 121, 125, 127, 128, 129, 135, 136, 139, 140, 141, 143, 144, 145, 151, 155, 157, 162, 165, 173, 174, 175, 176, 177], "api_kei": [27, 75, 76, 77, 78, 79, 80, 94, 99, 177], "api_st": 38, "app": [29, 30, 32, 33, 34, 35, 36, 110, 164], "appar": [8, 92, 114], "appeal": 19, "appear": [0, 20, 83, 95, 102, 107, 113, 117, 118, 145, 155, 159, 164, 170], "append": [11, 14, 19, 43, 64, 73, 91, 135, 145, 176, 182], "append_paged_kv_cach": [83, 167], "appl": [90, 164, 175], "appli": [0, 12, 13, 14, 15, 16, 20, 21, 23, 24, 40, 42, 44, 83, 89, 90, 91, 95, 97, 98, 99, 110, 115, 117, 119, 122, 125, 128, 129, 130, 136, 145, 146, 150, 153, 159, 164, 167, 170, 174, 175, 176, 177], "applic": [0, 4, 7, 8, 9, 13, 15, 16, 17, 18, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 40, 43, 46, 47, 48, 49, 64, 82, 88, 92, 107, 113, 121, 125, 135, 150, 155, 158, 161, 162, 163, 164, 172, 173, 182], "apply_batched_logits_processor": 159, "apply_callback": [91, 176], "apply_chat_templ": [13, 24, 59, 94], "apply_llama3_sc": 145, "apply_query_key_layer_sc": [146, 147], "apply_residual_connection_post_layernorm": 147, "apply_rotary_pos_emb": 145, "apply_rotary_pos_emb_chatglm": 145, "apply_rotary_pos_emb_cogvlm": 145, "apply_router_weight_on_input": 164, "apply_silu": 145, "applybiasropeupdatekvcach": 164, "applyrop": 13, "appreci": [10, 11, 15], "approach": [0, 8, 11, 12, 13, 14, 15, 16, 17, 42, 44, 91, 92, 102, 107, 108, 112, 116, 119, 121, 123, 125, 136, 144, 159, 170, 176], "appropri": [2, 7, 11, 17, 38, 39, 42, 50, 86, 88, 92, 102, 155, 170, 173], "approx": 30, "approxim": [15, 16, 19, 34, 110, 146], "apt": [2, 89, 110, 113, 132, 174], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 55, 57, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 75, 76, 79, 82, 83, 85, 86, 89, 91, 92, 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 150, 151, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 169, 170, 172, 174, 176, 177, 179, 180, 181, 182], "arang": 145, "arbitrag": [44, 136], "arbitrari": [10, 38, 96, 107, 108, 130, 164], "arbitrarili": 12, "arch": 107, "architectur": [4, 8, 14, 15, 16, 17, 20, 21, 29, 30, 31, 32, 33, 34, 35, 41, 84, 85, 89, 90, 91, 92, 95, 109, 110, 114, 116, 118, 121, 128, 147, 150, 154, 157, 161, 164, 168, 169, 170, 174, 175, 176], "archiv": [91, 176], "arctic": [154, 164], "area": [15, 16, 30, 34], "aresult": [50, 159], "arg": [0, 11, 22, 24, 27, 38, 65, 70, 71, 73, 85, 86, 91, 108, 119, 131, 146, 147, 148, 150, 159, 164, 169, 170, 176], "arg17_1": 108, "arg18_1": 108, "arg19_1": 108, "arg20_1": 108, "arg21_1": 108, "arg22_1": 108, "arglist": 119, "argmax": [12, 107, 145], "argpars": [65, 70, 71, 73, 148], "argument": [2, 10, 24, 27, 29, 30, 31, 32, 33, 34, 35, 39, 44, 50, 56, 57, 64, 68, 74, 83, 92, 94, 96, 107, 108, 109, 110, 114, 115, 136, 138, 145, 148, 151, 159, 164, 167], "argumentpars": [65, 70, 71, 73], "aris": [8, 10, 110], "arithmet": [10, 21, 129], "arm": 39, "arm64": 21, "aros": 20, "around": [1, 20, 91, 98, 128, 131, 137, 143, 176], "arrai": [0, 1, 39, 145, 150, 159], "arrang": 0, "arrayview": [0, 1], "arriv": [0, 8, 11, 65, 96, 116], "arrivaltim": 0, "arrow": 145, "art": [2, 13, 16, 17, 35], "articl": [13, 14, 41, 117, 125], "artifact": [40, 109, 110], "artifici": [18, 72], "arxiv": [0, 1, 23, 116, 122, 145, 148, 153], "as_dtyp": 145, "as_lay": 119, "as_shap": 145, "ascii": [145, 159], "asciichar": 1, "ask": [11, 19, 21, 30, 31, 34, 70, 155], "aspect": [102, 117], "asscoiat": 91, "assembl": [11, 129], "assert": [64, 78, 79, 96, 119, 145, 155, 164, 182], "assert_valid_quant_algo": 147, "asset": 9, "assign": [0, 10, 16, 20, 44, 97, 131, 136, 146, 148, 160, 165], "assist": [18, 21, 27, 30, 31, 34, 35, 46, 47, 59, 75, 76, 79, 94, 118, 158, 163, 172], "assistant_model": 118, "assistantfin": 30, "associ": [1, 8, 15, 17, 40, 42, 92, 107, 110, 115, 116, 122, 137, 145, 176], "asssembl": 125, "assum": [1, 2, 9, 14, 15, 16, 34, 44, 57, 108, 115, 121, 122, 125, 126, 136, 145, 150, 159], "assumpt": [17, 23, 125], "assur": 16, "async": [1, 11, 50, 61, 62, 136, 150, 159], "asynchron": [1, 10, 11, 16, 42, 50, 58, 64, 96, 100, 105, 115, 159], "asynchroni": 16, "asyncio": [11, 61, 62], "asyncllm": [109, 159], "asyncllmengin": 164, "aten": [89, 90, 174, 175], "atlant": [30, 34], "atom": 1, "attach": [2, 82, 107, 159, 163], "attack": 97, "attempt": [0, 1, 40, 45, 92, 96, 114, 137, 139, 159], "attend": [98, 144], "attent": [0, 1, 2, 3, 8, 10, 12, 16, 17, 20, 21, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 42, 58, 71, 82, 84, 85, 88, 89, 92, 93, 95, 107, 108, 109, 114, 118, 120, 121, 122, 125, 127, 129, 130, 145, 148, 150, 151, 155, 157, 159, 160, 161, 162, 164, 165, 168, 169, 170, 172, 173, 174, 181], "attention_backend": [73, 83, 160, 165, 167], "attention_bia": 164, "attention_chunk_s": 98, "attention_dp": 164, "attention_dp_config": [8, 30, 159], "attention_dp_events_gather_period_m": 159, "attention_head_s": [145, 146], "attention_mask": [83, 145, 146, 147, 150, 167], "attention_mask_param": 147, "attention_mask_typ": 146, "attention_multipli": 147, "attention_output": 155, "attention_output_orig_quant_scal": 145, "attention_output_sf_scal": 145, "attention_packed_mask": [145, 146], "attention_param": [146, 147], "attention_qk_half_accumul": 164, "attention_window_s": [83, 120], "attentionconfig": 0, "attentiondpconfig": [109, 159], "attentiondpeventsgatherperiodm": 0, "attentiondprank": 0, "attentionheads": 1, "attentionlayernumperpp": 0, "attentionmask": [83, 167], "attentionmaskparam": 146, "attentionmasktyp": [145, 146], "attentionmetadata": [107, 160, 165], "attentionparam": [146, 147], "attentiontyp": 0, "attn": [85, 86, 169, 170], "attn_backend": [73, 83, 88, 90, 159, 167, 172, 175], "attn_bia": 147, "attn_dens": [23, 122], "attn_forward_funcnam": 146, "attn_k": [23, 44, 99, 122, 136, 177], "attn_logit_softcap": 147, "attn_logit_softcapping_scal": 145, "attn_metadata": [160, 165], "attn_page_s": [88, 173], "attn_processor": 147, "attn_q": [23, 44, 99, 122, 136, 177], "attn_qkv": [23, 122], "attn_v": [23, 44, 99, 122, 136, 177], "attribut": [0, 1, 107, 115, 119, 130, 131, 148, 150, 159], "audienc": 66, "audio": [27, 150, 154, 161, 164], "audio_engine_dir": 150, "audio_featur": 150, "audio_path": 150, "audio_url": 27, "aug": 12, "augment": 172, "august": 12, "authent": [9, 137, 158], "author": [10, 107], "authorized_kei": [132, 133], "auto": [0, 1, 16, 21, 22, 23, 26, 29, 32, 33, 40, 44, 73, 85, 92, 97, 109, 114, 115, 117, 118, 123, 126, 129, 136, 143, 145, 147, 148, 149, 159, 164, 169], "auto_deploi": [85, 86, 89, 164, 169, 170, 174], "auto_deploy_log_level": [87, 171], "auto_function": 108, "auto_functionalize_v2": 108, "auto_parallel": 164, "auto_quantize_bit": 149, "autoawq": 164, "autodecodingconfig": [109, 159], "autodeploi": [84, 85, 86, 88, 90, 109, 157, 164, 168, 169, 170, 172, 173, 175], "autodeploy_config": [86, 170, 172], "autodeploy_overrid": [86, 170], "autodeployconfi": [86, 170], "autodeployconfig": [86, 88, 170, 173], "autogptq": 164, "autom": [17, 39, 59, 66, 89, 90, 164, 174, 175], "automat": [0, 1, 10, 12, 13, 16, 21, 24, 27, 31, 38, 39, 42, 44, 50, 67, 84, 86, 89, 107, 115, 119, 123, 129, 130, 136, 137, 145, 151, 153, 158, 159, 162, 164, 168, 170, 172, 174], "automodelforcausallm": [85, 88, 90, 169, 173, 175], "automodelforimagetexttotext": [90, 175], "automot": 64, "autonom": 18, "autopp": 164, "autoq": 164, "autoregress": [0, 42, 71, 83, 125, 167, 181], "autotoken": [11, 50], "autotun": [159, 164], "autotuner_en": 164, "aux": 151, "auxiliari": [10, 82, 91, 125, 176], "avaiable_block": 182, "avail": [0, 1, 3, 5, 8, 9, 11, 16, 17, 20, 26, 27, 29, 30, 32, 33, 36, 40, 42, 44, 50, 55, 60, 61, 63, 72, 82, 83, 84, 85, 88, 92, 94, 97, 100, 102, 104, 105, 106, 107, 110, 111, 113, 115, 119, 121, 124, 125, 129, 136, 142, 143, 144, 150, 151, 153, 156, 157, 158, 159, 163, 164, 167, 168, 169, 170, 180, 181], "averag": [0, 2, 8, 10, 14, 16, 17, 19, 20, 22, 26, 29, 30, 32, 33, 44, 45, 125, 136, 137, 139, 142, 143, 159], "avg": [8, 26, 29, 30, 32, 33, 44, 136, 137, 145], "avg_decoded_tokens_per_it": [30, 31, 34], "avg_pool2d": 145, "avgdecodedtokensperit": 0, "avgnumdecodedtokensperit": 0, "avgpool2d": 146, "avoid": [1, 8, 10, 11, 12, 13, 14, 15, 16, 21, 29, 32, 38, 96, 97, 107, 108, 110, 131, 150, 151, 158, 164], "awai": [102, 142, 143], "await": [0, 11, 41, 50, 61, 62, 115], "await_respons": 159, "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 115], "awar": [3, 12, 17, 19, 21, 83, 107, 108, 117, 155, 164], "awq": [7, 50, 104, 154, 164], "awq_block_s": 149, "ax": 145, "axi": [6, 16, 145], "b": [1, 3, 4, 5, 6, 39, 43, 65, 86, 119, 122, 129, 135, 145, 147, 150, 159, 164, 170], "b200": [14, 15, 16, 18, 19, 21, 28, 29, 30, 36, 45, 109, 162, 164], "b200_nvl": [28, 30, 36], "b300": [30, 162], "b6261862419c33d6ce2313aff1e7116067d6037d": 2, "b_sf": 145, "back": [0, 1, 10, 12, 16, 42, 45, 56, 64, 65, 96, 97, 121, 123, 125, 164], "backend": [0, 2, 9, 10, 11, 14, 15, 16, 22, 24, 26, 27, 31, 32, 33, 34, 35, 38, 39, 43, 44, 45, 51, 58, 59, 64, 73, 84, 85, 86, 88, 89, 91, 94, 98, 99, 100, 104, 106, 107, 109, 115, 125, 129, 136, 158, 159, 160, 164, 166, 168, 169, 170, 172, 173, 174, 176, 177, 179, 180, 181, 182], "backend_token": [0, 115], "backendagentdesc": 0, "backendtyp": [0, 92], "background": [1, 16, 26, 42, 96, 109], "backgroundconfigur": 1, "backlog": 39, "backstream": 1, "backtrac": 31, "backtyp": 1, "backu": [0, 115, 159], "backup": 1, "backward": [89, 91, 131, 174, 176], "bad": [0, 38, 106, 115, 159, 162, 164, 180], "bad_token_id": 159, "bad_words_data": 150, "bad_words_list": 150, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [153, 154, 164], "baichuan2": 154, "baichuanconfig": 147, "baichuanforcausallm": 147, "balanc": [11, 15, 17, 28, 29, 36, 38, 71, 72, 97, 102, 108, 109, 116, 118, 125, 129, 142, 144, 159, 164], "band": 59, "bandwidth": [3, 4, 5, 7, 12, 15, 16, 20, 21, 45, 59, 92, 114, 118, 129], "bangbang": 4, "bank": 12, "bantoken": 0, "banword": 0, "bar": [12, 159], "bare": [157, 164], "barissglc": 70, "barrier": [19, 162], "bart": [154, 164], "base": [0, 1, 2, 3, 4, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 23, 26, 32, 33, 35, 40, 41, 42, 44, 61, 62, 64, 66, 67, 68, 69, 86, 90, 91, 92, 96, 99, 102, 103, 107, 108, 109, 110, 115, 121, 122, 123, 125, 127, 130, 131, 136, 142, 144, 145, 146, 147, 148, 149, 150, 151, 154, 157, 159, 160, 161, 164, 165, 166, 170, 175, 176, 177, 181, 182], "base64": [27, 76], "base_checkpoint_load": [91, 176], "base_config_load": [91, 176], "base_model": 122, "base_s": 146, "base_url": [27, 29, 32, 33, 75, 76, 77, 78, 79, 80, 94, 99, 177], "base_weight_load": [91, 176], "base_weight_mapp": [91, 176], "baseagentconfig": 0, "basecheckpointload": 159, "basekvcachemanag": 0, "baselin": [7, 11, 12, 13, 14, 15, 20, 83, 86, 137, 142, 143, 167, 170], "baseline_fp8_engin": 139, "basellmarg": 159, "baseloopbackag": 0, "basemodel": [148, 159], "baseresourcemanag": [166, 181], "basesparseattentionconfig": 159, "basetransferag": [0, 41], "bash": [9, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 43, 46, 47, 48, 49, 51, 53, 54, 64, 65, 67, 68, 69, 71, 73, 129, 133, 135, 163], "basi": 17, "basic": [43, 109, 127, 135, 145, 164], "basic_string_view": 0, "batch": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 42, 44, 45, 65, 71, 73, 84, 85, 86, 89, 92, 95, 98, 100, 101, 103, 107, 108, 109, 118, 121, 122, 123, 125, 126, 129, 134, 136, 137, 139, 140, 141, 143, 144, 145, 146, 148, 150, 151, 155, 159, 160, 162, 164, 165, 166, 167, 168, 169, 170, 174, 178, 181, 182], "batch_beam_s": [117, 145], "batch_dim": 145, "batch_idx": 150, "batch_input_id": 150, "batch_manag": [0, 1, 64, 164, 181], "batch_output": [42, 65, 101], "batch_schedul": 164, "batch_siz": [2, 3, 6, 10, 12, 19, 26, 29, 31, 32, 33, 35, 45, 71, 73, 83, 86, 107, 117, 119, 126, 128, 145, 146, 149, 150, 151, 159, 167, 170], "batch_wait_max_tokens_ratio": 159, "batch_wait_timeout_it": 159, "batch_wait_timeout_m": 159, "batchdon": 1, "batched_logits_processor": 159, "batchedlogitsprocessor": 159, "batchidx": 1, "batchindex": 1, "batching_typ": 159, "batching_wait_it": [8, 30, 159], "batchingtyp": [0, 109, 159], "batchsiz": [0, 1, 4, 118], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "bc": 145, "bc1393d529ce485c961d9ffee5b25d72": [32, 33], "beam": [0, 1, 6, 22, 23, 24, 27, 42, 50, 72, 92, 98, 109, 118, 125, 145, 150, 151, 155, 159, 164], "beam_search_diversity_r": [150, 159], "beam_width": [22, 50, 72, 83, 102, 117, 118, 145, 150, 164], "beam_width_arrai": 159, "beamhypothes": 1, "beamsearch": [0, 159], "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 118], "beamsiz": 0, "beamtoken": [0, 115], "beamwidth": [0, 1, 114, 115, 118, 159, 164], "beamwidtharrai": [0, 1, 118], "beat": [86, 170], "beauti": 72, "becam": [0, 12, 20], "becaus": [0, 7, 8, 10, 11, 13, 14, 15, 16, 20, 23, 44, 45, 50, 70, 83, 92, 102, 103, 107, 108, 111, 114, 115, 121, 136, 137, 138, 139, 142, 144, 145, 148, 151, 158, 159], "becom": [7, 8, 10, 11, 12, 13, 15, 16, 19, 20, 38, 83, 86, 98, 100, 102, 103, 116, 117, 118, 119, 121, 122, 129, 130, 170], "been": [0, 4, 5, 10, 11, 12, 13, 15, 16, 20, 44, 70, 83, 90, 96, 97, 102, 105, 110, 111, 113, 115, 117, 131, 133, 136, 139, 142, 145, 155, 159, 163, 164, 175], "befor": [0, 1, 8, 10, 11, 12, 13, 14, 16, 18, 20, 21, 22, 29, 32, 38, 43, 64, 66, 67, 68, 69, 82, 83, 84, 92, 96, 97, 102, 103, 106, 107, 109, 110, 112, 113, 114, 115, 117, 119, 121, 122, 123, 128, 129, 130, 135, 138, 139, 142, 144, 145, 147, 150, 151, 155, 158, 159, 160, 163, 164, 165, 166, 167, 168, 170, 181, 182], "beforehand": 137, "began": 20, "begin": [29, 30, 31, 32, 33, 34, 35, 64, 89, 90, 94, 125, 138, 159, 160, 164, 165, 174, 175], "begin_thinking_phase_token": 159, "behav": [0, 148, 151, 159], "behavior": [8, 11, 20, 29, 30, 32, 33, 34, 35, 40, 44, 45, 65, 83, 84, 86, 92, 96, 102, 106, 109, 114, 117, 142, 145, 150, 151, 159, 164, 168, 170], "behaviour": [0, 16, 145], "behind": [4, 11, 12, 15, 42], "being": [0, 10, 15, 18, 19, 20, 21, 29, 30, 32, 33, 34, 35, 41, 64, 70, 83, 85, 91, 97, 102, 105, 113, 117, 121, 129, 131, 142, 155, 159, 164, 167, 169, 176], "believ": [16, 44, 136], "belong": [20, 102, 142], "below": [0, 2, 5, 6, 7, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 26, 29, 30, 32, 33, 34, 35, 36, 37, 39, 44, 45, 83, 92, 94, 95, 102, 104, 107, 108, 110, 112, 117, 118, 119, 120, 122, 132, 133, 136, 139, 142, 143, 155, 163, 164], "bench": [2, 10, 14, 16, 19, 21, 26, 29, 30, 31, 32, 33, 34, 35, 44, 45, 58, 70, 103, 108, 109, 136, 140, 141, 163, 164, 172], "benchmark": [9, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 25, 27, 43, 68, 85, 86, 89, 92, 95, 100, 103, 109, 110, 114, 135, 139, 140, 141, 143, 163, 164, 169, 170, 172, 174], "benchmark_2nod": 27, "benchmark_ratio": 31, "benchmark_serv": [26, 29, 30, 31, 32, 33, 34, 35, 164], "benefici": [10, 12, 15, 17, 20, 44, 92, 102, 136, 142, 143], "benefit": [5, 7, 8, 10, 12, 14, 15, 16, 17, 19, 20, 23, 45, 64, 66, 71, 95, 98, 102, 108, 119, 121, 123, 142, 148, 164], "berkelei": 24, "berlin": 94, "bert": [23, 104, 145, 148, 153, 154, 161, 164], "bert_attent": 145, "bert_attention_plugin": [23, 148], "bert_context_fmha_fp32_acc": [23, 148], "bertattent": 146, "bertattentionplugin": 145, "bertbas": 147, "bertforquestionansw": 147, "bertforsequenceclassif": [147, 154, 161], "bertmodel": 147, "besid": [20, 26, 27, 166], "best": [8, 10, 11, 13, 14, 15, 16, 17, 19, 21, 30, 31, 43, 44, 84, 109, 117, 129, 135, 136, 138, 140, 141, 142, 159, 164, 168, 172], "best_of": [106, 159, 164], "best_path": 150, "best_path_len": 150, "best_path_length": 150, "best_perf_practice_on_deepseek": [13, 164], "bestofn": 11, "bestpathindic": 1, "bestpathlength": 1, "beta": [27, 145, 157, 159, 162], "beta_fast": 145, "beta_slow": 145, "better": [0, 6, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 30, 34, 41, 42, 45, 66, 67, 68, 69, 71, 83, 92, 97, 98, 101, 102, 105, 107, 114, 117, 118, 121, 123, 130, 131, 138, 139, 143, 144, 148, 159, 164, 178], "between": [0, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 26, 27, 29, 30, 32, 33, 34, 35, 38, 41, 43, 45, 47, 64, 71, 76, 83, 88, 91, 92, 95, 96, 97, 99, 103, 108, 114, 117, 118, 120, 121, 124, 125, 129, 130, 131, 135, 138, 143, 144, 145, 146, 151, 155, 159, 160, 164, 165, 176, 177], "beyond": [1, 4, 11, 19, 64, 100, 102, 139], "bf16": [1, 2, 12, 13, 15, 18, 34, 83, 90, 104, 117, 123, 130, 131, 139, 143, 154, 161, 164, 175], "bfloat16": [23, 44, 73, 83, 108, 117, 129, 136, 137, 148, 153, 154, 159, 164], "bhuvanesh09": 164, "bi": [83, 117], "bia": [0, 15, 83, 115, 128, 129, 145, 146, 147, 159, 164], "bias": [128, 145], "bidirect": [41, 145, 146], "bidirectionalglm": 145, "bielik": 154, "big": [19, 66], "bigcod": [90, 175], "bigger": [34, 121], "biggest": [79, 121], "billion": 2, "bin": [2, 9, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 43, 46, 47, 48, 49, 51, 53, 54, 67, 68, 69, 91, 128, 129, 130, 135, 155, 164, 176], "binari": [41, 43, 91, 125, 129, 135, 145, 176], "bind": [1, 9, 10, 11, 16, 64, 69, 144, 150, 151, 159, 164, 166, 181, 182], "bindcapacityschedul": 182, "bindf": 40, "bit": [0, 1, 4, 16, 70, 83, 108, 117, 145, 148, 153], "bit_length": 108, "bitmask": 164, "bl": [17, 125], "black": [108, 119], "blackwel": [2, 8, 12, 14, 16, 19, 20, 36, 45, 95, 104, 107, 109, 138, 139, 154, 162, 164], "blank": 97, "blip": [153, 164], "blip2": [104, 153, 154, 164], "blob": 13, "block": [0, 1, 10, 11, 12, 15, 16, 17, 20, 23, 50, 56, 64, 65, 71, 83, 92, 95, 96, 97, 98, 102, 104, 107, 108, 114, 117, 118, 121, 129, 142, 145, 148, 150, 151, 158, 159, 162, 164, 172, 181], "block_controlnet_hidden_st": 147, "block_id": [64, 96], "block_num": 145, "block_po": 64, "block_siz": [64, 145, 146, 150], "block_sparse_block_s": 145, "block_sparse_homo_head_pattern": 145, "block_sparse_num_local_block": 145, "block_sparse_param": 146, "block_sparse_vertical_strid": 145, "blockhash": 0, "blockidx": 1, "blockkei": 0, "blockptr": 1, "blocksiz": 0, "blockspars": 145, "blocksparseattnparam": 146, "blockwis": [64, 164], "blog": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 45, 92, 101, 103, 164, 178], "bloodeagle40234": 164, "bloom": [118, 130, 153, 154, 164], "bloom_dict": 130, "bloomforcausallm": 147, "bloommodel": 147, "bm": 1, "bmm": [129, 164], "bo": [29, 32], "board": [89, 143, 174], "bodi": [108, 129], "bold": 12, "book": 70, "bool": [0, 1, 64, 66, 83, 91, 96, 108, 119, 126, 128, 145, 146, 147, 148, 150, 159, 167, 176], "boolean": [1, 38, 96, 115, 122, 145, 147, 148], "boost": [2, 13, 15, 16, 18, 20, 66, 139, 142, 143], "boost_factor": 66, "boost_val": 66, "border": [29, 30, 31, 32, 33, 34], "born": [127, 129, 155], "borough": [30, 34], "borrow": [44, 50, 136], "bos_token": 66, "bos_token_ad": 66, "bos_token_id": [66, 150], "boston": 30, "bot": [39, 94], "both": [0, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 29, 30, 32, 33, 34, 35, 39, 44, 45, 55, 64, 72, 83, 86, 92, 96, 98, 99, 102, 103, 107, 114, 116, 117, 119, 120, 122, 125, 129, 130, 136, 138, 142, 144, 145, 146, 148, 151, 153, 159, 164, 166, 167, 170, 177], "bother": 64, "bottleneck": [2, 7, 8, 16, 20, 42, 98, 102, 116, 123, 138, 142], "bottom": [8, 10, 20, 133], "bound": [0, 5, 8, 12, 13, 14, 15, 20, 21, 41, 42, 101, 108, 118, 127, 129, 136, 145, 150, 151, 159, 164], "boundari": [15, 16, 20, 108, 109, 118, 129, 145, 147, 149, 151, 159], "box": [2, 11, 29, 30, 32, 33, 34, 35, 39, 108, 119], "bpru": 164, "br": 21, "brace": 159, "bracket": 20, "brahma": [44, 136], "branch": [3, 6, 12, 16, 18, 22, 24, 27, 29, 30, 32, 33, 125, 159], "brave_search": 94, "breadth": 125, "break": [13, 16, 20, 34, 38, 44, 64, 125, 136, 143, 159, 162, 164, 182], "breakdown": [20, 43, 44, 45, 135, 136, 137], "breakthrough": 162, "breviti": [2, 20], "bridg": [10, 16], "brief": [21, 39, 83, 147, 150, 167], "briefli": [11, 27, 47, 76], "brife": 0, "bright": [66, 107], "brilliant": 12, "bring": [7, 12, 13, 14, 15, 16, 90, 160, 164, 165, 175], "broad": [21, 158], "broadcast": [13, 96, 115, 145], "broadcast_help": 145, "broader": [89, 108, 117, 158, 162, 164, 174], "broadli": [15, 83, 104], "broken": [10, 102, 142, 164], "bronx": [30, 34], "brooklyn": [30, 34], "brought": 16, "brows": 163, "bsz": 146, "bu": [31, 110], "bubbl": [8, 10, 108], "budget": [11, 73, 102, 109, 126, 142, 159], "buffalo": 34, "buffer": [0, 1, 10, 16, 23, 24, 27, 29, 30, 32, 33, 34, 35, 92, 98, 107, 108, 114, 115, 120, 121, 145, 148, 159, 164, 181], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 150, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 151, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [15, 39, 164], "build": [11, 12, 20, 21, 24, 29, 30, 31, 32, 33, 34, 35, 40, 43, 44, 70, 73, 84, 85, 88, 92, 96, 102, 108, 112, 113, 114, 115, 117, 118, 119, 121, 122, 124, 125, 126, 127, 129, 135, 138, 139, 140, 142, 144, 147, 148, 151, 155, 159, 162, 164, 168, 169, 173], "build_and_run_ad": [85, 89, 169, 174], "build_config": [23, 50, 70, 131, 139, 142, 143, 147, 156, 159], "build_connector_meta": [64, 96], "build_dir": 110, "build_engin": 129, "build_flags_multiple_profil": 143, "build_serialized_network": 129, "build_sqsh": 110, "build_wheel": [2, 31, 43, 110, 124, 135], "buildcacheconfig": [109, 159], "buildconfig": [50, 109, 126, 131, 139, 142, 143, 156, 159, 164], "builder": [126, 129, 131, 159, 164], "builder_force_num_profil": 164, "builder_opt": 164, "built": [10, 11, 15, 17, 19, 23, 24, 26, 29, 30, 31, 32, 33, 34, 35, 40, 42, 45, 85, 87, 89, 92, 108, 109, 110, 112, 113, 115, 118, 121, 129, 131, 136, 137, 143, 144, 145, 148, 151, 155, 158, 162, 164, 169, 171, 174], "bulid": 102, "bulk": 16, "bump": 1, "bumptaskinprogress": 1, "burden": 138, "busi": [0, 10, 17, 42, 66, 101], "button": 164, "buvnswrn": 164, "bw": [8, 164], "by_alia": 159, "by_nam": 159, "bypass": [17, 40, 92, 170], "byt5": [154, 164], "byte": [0, 1, 12, 41, 97, 123, 150, 159], "bytearrai": 159, "bytestostr": 1, "c": [0, 1, 2, 10, 14, 15, 17, 18, 20, 26, 27, 30, 31, 34, 35, 43, 50, 67, 68, 69, 92, 93, 102, 106, 108, 113, 117, 119, 125, 129, 132, 133, 135, 142, 145, 147, 150, 159, 161, 164, 166, 180, 181, 182], "c0": 8, "c1": 8, "c2c": [16, 20], "c440e2a3e7e14cd699295afc3739bf42": 21, "c4dep4_g1dep4": 17, "c5bf51b5cab94e10ba5da5266d12ee59": 30, "ca": 94, "cach": [0, 1, 7, 8, 12, 13, 14, 15, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 40, 42, 44, 45, 50, 57, 58, 71, 73, 84, 85, 88, 89, 90, 93, 95, 98, 100, 103, 108, 109, 114, 115, 118, 122, 129, 131, 134, 136, 137, 142, 145, 148, 150, 152, 153, 157, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 172, 174, 175, 182], "cache_block_id": [64, 96], "cache_fold": 64, "cache_indir": 150, "cache_indir_t": 145, "cache_indirect": [83, 117, 145, 146, 150, 155], "cache_root": 159, "cache_salt": [97, 159], "cache_transceiver_config": [31, 92, 159], "cached_properti": 159, "cached_token": 31, "cachehitr": [0, 27], "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachesaltid": 0, "cachesaltidtyp": [0, 1], "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 92, 109, 159], "cachetyp": 181, "cachevalu": 1, "calcul": [0, 3, 4, 6, 8, 12, 14, 15, 16, 17, 19, 20, 26, 29, 32, 33, 44, 97, 108, 136, 144, 145, 150, 151, 159, 164], "calculate_speculative_resourc": 159, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [139, 147, 159], "calib_batch_s": [139, 147, 159], "calib_config": [139, 159], "calib_dataset": [147, 149, 159], "calib_max_seq_length": [139, 147, 149, 159], "calib_s": [136, 149], "calibconfig": [109, 139, 159], "calibr": [7, 15, 16, 21, 23, 130, 139, 148, 159, 164], "california": [64, 94], "call": [0, 1, 10, 11, 12, 14, 15, 16, 17, 20, 38, 42, 43, 50, 65, 66, 83, 91, 92, 94, 96, 98, 107, 108, 115, 116, 117, 118, 119, 124, 129, 130, 131, 135, 137, 139, 145, 147, 148, 149, 150, 151, 159, 160, 164, 165, 166, 167, 181], "call_funct": 108, "callabl": [130, 147, 159], "callback": [91, 96, 115, 159, 176], "callstack": 10, "campaign": 66, "can": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 42, 44, 45, 50, 52, 55, 56, 57, 64, 66, 67, 68, 69, 70, 74, 82, 83, 85, 86, 88, 89, 91, 92, 94, 95, 96, 97, 98, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 123, 124, 125, 126, 129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 147, 148, 150, 151, 153, 154, 155, 157, 159, 160, 161, 163, 164, 165, 166, 167, 169, 170, 172, 173, 174, 176, 179, 181, 182], "canaccessp": 1, "canada": [30, 34], "canadian": 30, "cancel": [0, 115, 136, 159, 164], "cancelrequest": [0, 115], "candid": [0, 13, 14, 19, 106, 107, 118, 123, 125, 129, 159], "canenqueu": 0, "canenqueuerequest": 0, "cannot": [1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 91, 102, 103, 107, 108, 118, 129, 130, 142, 143, 144, 145, 151, 155, 159, 164, 176, 182], "cap": [22, 137], "capabl": [3, 11, 13, 16, 17, 43, 59, 86, 96, 109, 110, 135, 139, 159, 164, 170], "capac": [0, 1, 3, 5, 7, 16, 19, 20, 71, 159, 164, 182], "capacitor_schedul": 182, "capacity_scheduler_polici": [144, 159], "capacityschedul": [166, 181, 182], "capacityschedulerpolici": [0, 109, 144, 159, 164], "capit": [27, 30, 34, 35, 55, 56, 57, 60, 61, 62, 63, 71, 74, 94, 99, 113, 137, 144, 156, 158, 159, 163, 177], "caption": 146, "captur": [14, 15, 16, 35, 42, 71, 83, 90, 107, 108, 109, 159, 167, 175], "capturableguideddecod": 10, "capture_num_token": [108, 159], "car": 30, "card": [70, 85, 169], "care": [11, 16, 17, 34, 86, 92, 170], "carefulli": [2, 12, 16, 19], "carlo": 11, "case": [0, 1, 2, 4, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 29, 30, 32, 33, 34, 35, 38, 40, 44, 45, 50, 83, 92, 102, 107, 108, 109, 114, 117, 118, 120, 121, 122, 125, 136, 137, 139, 140, 141, 143, 145, 153, 158, 159, 164], "casefold": 159, "caseless": 159, "cast": [10, 15, 145], "cast_to_dtyp": 145, "castsiz": 1, "casual": 34, "cat": [2, 9, 14, 16, 20, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 51, 68, 86, 94, 103, 170], "catalog": [26, 29, 30, 32, 33, 110, 111], "catch": 20, "categor": [15, 104, 125, 145], "categori": [24, 38, 148], "categorical_sampl": 145, "cathedr": 35, "caught": 31, "caus": [8, 10, 16, 20, 23, 29, 92, 108, 113, 114, 115, 130, 131, 143, 155, 158, 159, 164], "causal": [14, 83, 145, 146, 167], "cautiou": 131, "caveat": [97, 139], "cd": [2, 14, 18, 31, 34, 35, 85, 89, 104, 105, 110, 127, 128, 136, 155, 169, 174, 179], "cdot": [26, 29, 30, 32, 33], "ceil": [1, 147], "ceil_mod": [145, 146], "ceildiv": 1, "celsiu": 94, "center": [4, 5, 17, 21, 92, 159], "centr": 30, "central": [34, 38, 91, 148, 176], "certain": [8, 16, 17, 42, 97, 103, 113, 119, 128, 145], "certainti": 11, "certainty_threshold": 11, "cg": 147, "cga": 164, "chain": [10, 11, 14, 24, 66], "challeng": [8, 13, 16, 17, 20, 38, 92, 98, 103, 109], "chanc": [16, 23, 121, 144, 148], "chang": [2, 3, 5, 6, 10, 12, 14, 15, 16, 29, 30, 32, 33, 34, 35, 39, 44, 67, 68, 74, 83, 89, 97, 98, 103, 105, 108, 109, 110, 114, 117, 118, 120, 121, 122, 130, 131, 136, 143, 145, 147, 148, 150, 151, 155, 156, 157, 158, 159, 174, 181], "channel": [16, 21, 23, 145, 148, 153, 164], "chapter": 11, "char": [0, 1, 159], "charact": [39, 159], "characterist": [8, 17, 71, 92], "charg": [83, 118, 129, 167], "chart": [4, 19], "chat": [5, 17, 18, 19, 21, 24, 26, 30, 31, 34, 35, 42, 48, 49, 53, 55, 56, 57, 59, 60, 61, 62, 63, 66, 70, 71, 72, 77, 79, 80, 81, 85, 86, 89, 90, 92, 94, 100, 109, 113, 125, 156, 158, 163, 164, 169, 170, 172, 174, 175], "chat_complet": [30, 94], "chat_templ": 27, "chat_template_kwarg": [24, 79], "chatbot": 70, "chatbot_lora_dir": 70, "chatcmpl": [18, 21, 30, 31, 34, 35, 163], "chatglm": [145, 153, 154, 164], "chatglm2": [154, 164], "chatglm3": [147, 154, 164], "chatglm_vers": 147, "chatglmconfig": 147, "chatglmforcausallm": 147, "chatglmgenerationsess": 150, "chatglmmodel": 147, "chatgpt": 21, "cheap": 107, "cheaper": 96, "check": [0, 10, 11, 12, 16, 18, 21, 29, 30, 32, 33, 34, 35, 36, 38, 40, 42, 45, 55, 64, 65, 69, 92, 96, 101, 109, 110, 113, 114, 115, 138, 139, 142, 143, 145, 150, 151, 155, 159, 160, 163, 164, 165, 178], "check_accuraci": [24, 128], "check_config": 147, "check_eagle_choic": 159, "check_gpt_mem_usag": 151, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [2, 12, 13, 14, 15, 18, 20, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 44, 55, 60, 85, 89, 90, 99, 104, 107, 109, 113, 127, 130, 131, 136, 137, 139, 148, 149, 150, 153, 155, 156, 158, 159, 160, 162, 163, 164, 165, 169, 174, 175, 177], "checkpoint_dir": [23, 91, 122, 126, 127, 128, 129, 131, 136, 155, 176], "checkpoint_format": 159, "checkpoint_load": [91, 159, 176], "checkposteriorvalu": 0, "checkpromptignorelength": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 155, "chen": 11, "china": 79, "chip": 64, "chmod": [29, 30, 31, 32, 33, 34, 35, 132], "choic": [0, 7, 14, 16, 18, 19, 21, 23, 24, 29, 30, 31, 32, 33, 34, 35, 41, 71, 72, 73, 74, 79, 83, 94, 125, 136, 138, 145, 150, 159, 163, 167], "choos": [13, 15, 16, 17, 29, 30, 32, 33, 85, 88, 94, 108, 129, 131, 139, 145, 164, 169, 173], "chose": [16, 20], "chosen": [15, 106, 151, 180, 182], "chri": 64, "chrome": [43, 135], "chrono": 0, "chunk": [0, 11, 15, 22, 23, 27, 28, 29, 30, 36, 37, 64, 73, 93, 100, 108, 109, 120, 134, 143, 145, 148, 150, 151, 152, 159, 161, 162, 164], "chunk_dim": 146, "chunk_length": 164, "chunk_scan": 145, "chunk_siz": [11, 145, 147], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": [1, 8, 40, 109, 164], "circular": [83, 98, 117], "citi": [30, 31, 34, 35, 79, 94, 99, 163, 177], "ckpt": [44, 136], "ckpt_dir": [129, 131, 147], "ckpt_llama_3": 129, "ckpt_sourc": 159, "cl": [127, 131, 159], "claim": [1, 130], "claimpag": 1, "claimpageswithevict": 1, "clamp": [0, 159, 164], "clamp_val": 159, "clara": [21, 64], "clarifi": [31, 34, 41], "clariti": 38, "class": [0, 1, 7, 10, 11, 23, 26, 27, 29, 30, 32, 33, 34, 35, 42, 50, 64, 66, 83, 85, 86, 88, 91, 97, 105, 106, 107, 108, 109, 110, 117, 118, 119, 120, 126, 127, 129, 130, 131, 138, 139, 143, 145, 146, 147, 149, 150, 155, 158, 159, 160, 164, 165, 166, 167, 169, 170, 173, 176, 182], "class_dropout_prob": 146, "class_label": 146, "classic": [16, 21, 129], "classifi": [38, 146, 147], "classmethod": [127, 131, 146, 147, 148, 150, 159], "classvar": 159, "clean": [2, 10, 16, 43, 64, 91, 110, 135, 155, 176], "cleaner": 12, "cleanup": [64, 91, 176], "clear": [8, 16, 21, 31, 34, 38, 108, 142, 150], "clear_logprob_param": 159, "clearer": 24, "clearli": [8, 10, 11, 12, 16, 38, 144], "clearvirtualmemoryalloc": 1, "cli": [2, 9, 21, 44, 50, 128, 136, 138, 139, 142, 143, 163, 164, 172], "click": [64, 72, 74, 90, 132, 133, 175], "client": [0, 17, 20, 21, 27, 29, 30, 31, 32, 33, 34, 35, 45, 69, 81, 92, 94, 97, 100, 109, 115, 125], "client_id": [66, 106, 159], "clientid": 0, "clip": 145, "clip_before_cast": 145, "clip_qkv": [146, 147], "clip_vision_model": 147, "clipvisiontransform": 147, "clock": 13, "clone": [2, 11, 104, 110, 122, 137, 155, 158, 179], "clone_input": 119, "close": [2, 10, 12, 16, 17, 20, 23, 83, 117, 131, 143, 151], "closer": 8, "closur": 145, "cloud": [4, 21, 52, 132, 133], "cls_token": 146, "cluster": [13, 16, 18, 20, 21, 22, 27, 31, 52, 103, 113, 118, 129, 159, 164], "cluster_info": 164, "cluster_kei": 164, "cluster_s": [22, 27], "clusteruuid": [92, 114], "cmake": [110, 164], "cmpl": [29, 32, 33], "cn": 8, "cnn": 24, "cnn_dailymail": [147, 159], "co": [0, 2, 8, 9, 10, 12, 14, 15, 16, 27, 47, 76, 122, 145, 146, 155, 158], "coast": [30, 34, 163], "code": [7, 10, 11, 13, 15, 16, 18, 20, 21, 22, 26, 27, 31, 38, 40, 41, 42, 43, 50, 56, 67, 68, 69, 83, 89, 90, 94, 97, 107, 108, 109, 112, 117, 119, 120, 123, 124, 125, 129, 131, 135, 136, 145, 153, 154, 155, 158, 159, 160, 162, 164, 165, 172, 174, 175, 181, 182], "codebas": [38, 120, 160, 165], "codec": 159, "codegemma": [90, 175], "codellama": [90, 164, 175], "codepath": 164, "codeqwen": 164, "coderham": 164, "codestr": [90, 175], "coerc": 159, "coerce_env_overrides_to_str": 159, "cognit": 98, "cogvlm": [154, 164], "cogvlmattent": 146, "cogvlmconfig": 147, "cogvlmforcausallm": 147, "coher": [38, 118, 164], "cohereconfig": 147, "cohereforcausallm": 147, "coincid": 12, "cold": 16, "collabor": [8, 10, 11, 13, 15, 16, 17, 20, 118, 145], "collect": [1, 8, 12, 13, 15, 17, 20, 38, 45, 92, 105, 119, 123, 125, 129, 145, 159, 160, 163, 165], "collect_and_bia": 146, "collective_rpc": 159, "collector": 16, "collis": 100, "color": [70, 102, 142], "column": [122, 145, 153, 170], "columnlinear": [122, 127, 146], "colwis": 170, "com": [2, 9, 13, 18, 27, 29, 30, 32, 33, 39, 40, 66, 104, 110, 129, 131, 145, 155, 164, 179], "combin": [0, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 23, 26, 29, 30, 32, 33, 36, 40, 45, 67, 68, 69, 72, 83, 86, 95, 97, 100, 103, 107, 109, 119, 125, 136, 139, 142, 146, 147, 148, 157, 159, 164, 167, 170, 182], "combinedtimesteplabelembed": 146, "combinedtimesteptextprojembed": 146, "come": [4, 10, 16, 17, 19, 21, 92, 102, 105, 118, 122, 137, 138, 142, 144, 151, 155, 159], "comm": 159, "comma": [145, 150], "command": [2, 9, 17, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 43, 44, 64, 67, 68, 69, 70, 72, 74, 84, 86, 99, 100, 110, 111, 121, 122, 124, 127, 128, 129, 131, 132, 133, 135, 136, 137, 143, 148, 151, 155, 158, 163, 164, 168, 170, 177, 179], "commandr": 164, "comment": [39, 164, 170], "commerci": [32, 33], "commit": [2, 12, 15, 18, 20, 22, 24, 27, 39, 40, 164], "committe": 38, "commmod": 0, "common": [0, 2, 12, 15, 16, 19, 20, 22, 26, 34, 36, 40, 55, 71, 83, 85, 96, 102, 109, 117, 120, 121, 125, 145, 151, 158, 159, 169, 181], "commonli": [8, 11, 13, 27, 119, 164], "commstat": 0, "commtyp": 0, "commun": [0, 8, 10, 11, 12, 15, 17, 21, 23, 32, 33, 89, 92, 109, 114, 118, 123, 129, 139, 145, 148, 154, 159, 164, 174], "communicationmod": 0, "communicationtyp": 0, "compact": [12, 159], "compani": [18, 64, 66, 92], "compar": [1, 2, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 24, 83, 100, 103, 130, 139, 142, 143, 144, 145, 159, 167], "comparison": [4, 13, 14, 44, 118, 136, 159], "compat": [9, 12, 14, 16, 17, 18, 20, 21, 22, 26, 27, 29, 30, 32, 33, 34, 35, 44, 69, 90, 91, 92, 107, 110, 113, 125, 131, 143, 146, 154, 158, 159, 160, 163, 164, 165, 172, 175, 176], "compbin": 122, "compel": 8, "compet": [10, 16, 20], "competit": 21, "compil": [10, 18, 21, 29, 30, 32, 33, 34, 35, 43, 44, 84, 85, 86, 88, 89, 109, 118, 123, 124, 135, 136, 145, 155, 159, 164, 168, 169, 170, 172, 173, 174], "compile_backend": [84, 88, 90, 168, 172, 175], "compile_model": [84, 86, 168, 170, 172, 173], "complementari": 8, "complet": [0, 1, 8, 10, 11, 12, 16, 18, 20, 21, 26, 29, 30, 31, 32, 33, 34, 35, 38, 41, 42, 44, 45, 46, 47, 50, 64, 72, 75, 76, 81, 85, 90, 91, 92, 94, 96, 99, 101, 102, 103, 107, 108, 109, 110, 115, 118, 120, 121, 125, 136, 137, 142, 143, 159, 163, 164, 169, 172, 175, 176, 177, 178, 181, 182], "complete_sent": 66, "completion_token": [18, 21, 29, 30, 31, 32, 33, 34, 35, 163], "completionoutput": [50, 109, 159], "complex": [8, 11, 13, 16, 38, 66, 86, 108, 109, 112, 119, 120, 125, 129, 170], "compli": 27, "complic": [10, 14, 15, 16, 107, 160, 165], "compon": [7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 38, 42, 64, 83, 96, 104, 107, 108, 109, 115, 117, 129, 153, 166], "compos": [0, 11, 16, 20, 34, 109, 118, 136], "comprehens": [2, 8, 22, 27, 29, 30, 31, 37, 38, 84, 94, 109, 163, 168], "compress": [3, 15, 91, 176], "compris": [7, 8, 17], "comput": [0, 1, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 41, 42, 43, 44, 55, 60, 61, 63, 64, 65, 66, 83, 89, 92, 95, 96, 97, 98, 101, 102, 103, 104, 108, 109, 110, 113, 116, 117, 118, 121, 125, 129, 135, 136, 138, 139, 144, 145, 148, 151, 155, 156, 158, 159, 160, 163, 164, 165, 166, 167, 174, 178, 181], "computation": 8, "compute_relative_bia": 146, "computecontextlogit": 1, "computecountandindicedevic": 12, "computecumsumdevic": 12, "computed_block": 64, "computed_posit": 64, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [13, 127, 145], "concat_kvcach": 13, "concat_qkv": 20, "concaten": [12, 13, 83, 117, 122, 130, 145, 159, 160, 165], "concept": [11, 17, 44, 103, 129, 136, 140, 141, 159, 164, 181], "conceptu": [1, 38], "concern": [16, 129, 151], "concert": 8, "concis": 21, "conclud": [16, 20], "conclus": [19, 109, 140, 141], "concret": [16, 107, 159, 160, 165], "concurr": [1, 2, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 42, 44, 92, 98, 99, 100, 101, 108, 114, 125, 136, 164, 177, 178], "concurrency_": [26, 29, 30, 31, 32, 33, 34, 35], "concurrency_list": [26, 29, 30, 31, 32, 33, 34, 35], "cond_proj_dim": 146, "conda": 164, "condit": [0, 1, 8, 11, 16, 17, 39, 44, 92, 95, 115, 118, 119, 125, 136, 145, 146, 164], "condition": [40, 145], "conditioning_embed": 146, "conditioning_embedding_dim": 146, "conduct": [12, 17, 20, 44, 92, 114, 117, 136], "confid": 11, "confidenti": 159, "config": [0, 1, 2, 3, 9, 14, 15, 18, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 44, 51, 57, 64, 73, 83, 86, 92, 96, 97, 98, 107, 108, 117, 121, 122, 126, 127, 130, 131, 136, 142, 146, 147, 148, 150, 155, 159, 160, 164, 165, 170, 181], "config_cl": 159, "config_class": 147, "config_dir": 147, "config_fil": [27, 147, 159], "config_load": [91, 176], "configdict": 159, "configur": [0, 1, 2, 5, 17, 18, 19, 20, 22, 23, 26, 28, 31, 37, 39, 41, 44, 45, 58, 59, 64, 65, 67, 68, 69, 70, 83, 85, 88, 89, 90, 91, 92, 94, 101, 102, 103, 106, 107, 108, 109, 110, 113, 116, 117, 120, 124, 125, 130, 136, 137, 140, 141, 142, 144, 147, 150, 151, 155, 159, 163, 164, 167, 169, 173, 174, 175, 176, 178], "configuration_llama": [160, 165], "configuration_mymodel": [160, 165], "configuration_util": [160, 165], "configuratorptr": 1, "confirm": [12, 55, 65, 113, 156, 158, 163], "conflict": 12, "conform": [10, 159], "confus": 34, "congest": 16, "conjunct": 142, "connect": [0, 10, 16, 18, 20, 21, 29, 30, 32, 33, 34, 35, 92, 96, 114, 123, 129, 137, 138, 140, 141], "connecticut": [30, 31], "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connector": [58, 109, 159], "connector_cach": 64, "connector_cache_dir": 64, "connector_cache_fold": 64, "connector_cache_folder_kei": 64, "connector_modul": 64, "connector_scheduler_class": 64, "connector_worker_class": 64, "consecut": 118, "consequ": [7, 138, 143], "conserv": [0, 39, 144], "consid": [0, 1, 2, 7, 8, 11, 12, 16, 17, 18, 38, 45, 56, 57, 70, 72, 82, 102, 106, 122, 125, 142, 145, 159, 160, 165, 170, 182], "consider": [7, 12, 16, 17, 20, 50, 92, 131], "consist": [4, 8, 11, 13, 20, 38, 44, 83, 119, 131, 136, 137, 145, 153, 155, 159, 164, 167], "consol": 132, "consolid": [16, 125], "const": [0, 1, 115], "const_iter": 1, "constant": [1, 16, 19, 20, 83, 117, 145, 151], "constant_to_tensor_": 145, "constantli": [55, 60, 61, 63, 113, 156, 158, 163], "constants_to_tensors_": 145, "constantthreshold": 1, "constexpr": [0, 1], "constitut": [17, 38], "constpointercast": 1, "constrain": [7, 10, 65, 94, 103, 118, 164], "constraint": [0, 7, 10, 12, 16, 17, 83, 94, 102, 103, 106, 108, 113, 117, 118, 145, 159, 162], "construct": [0, 1, 11, 17, 19, 83, 88, 107, 115, 125, 129, 136, 145, 159, 164, 167, 173], "constructor": [0, 70, 83, 109, 126, 158, 159, 163, 167], "consult": [43, 110, 125, 135], "consum": [0, 15, 16, 20, 39, 108, 119, 145, 159], "consumpt": [4, 14, 23, 83, 98, 117, 148], "contact": 145, "contain": [0, 1, 10, 11, 12, 13, 16, 18, 20, 22, 23, 24, 27, 31, 36, 39, 44, 45, 65, 67, 68, 69, 83, 89, 90, 91, 92, 96, 97, 102, 108, 109, 112, 113, 115, 117, 118, 119, 120, 122, 123, 128, 129, 130, 131, 133, 136, 145, 147, 148, 150, 153, 154, 157, 159, 164, 166, 167, 174, 175, 176], "container_id": [18, 163], "container_imag": [31, 67, 68, 69], "container_img": 27, "container_mount": 31, "container_path": [29, 30, 33], "content": [1, 9, 18, 21, 24, 27, 29, 30, 31, 32, 33, 34, 35, 40, 46, 47, 48, 49, 59, 64, 75, 76, 79, 92, 94, 109, 114, 122, 131, 132, 145, 151, 159, 163, 164, 172], "content_typ": 159, "context": [0, 7, 10, 11, 14, 15, 16, 17, 19, 20, 23, 24, 27, 30, 31, 34, 41, 73, 82, 92, 95, 96, 106, 108, 109, 114, 116, 121, 134, 136, 140, 141, 145, 148, 150, 151, 155, 159, 162, 164, 167, 180, 181, 182], "context_and_gener": 159, "context_chunking_polici": [144, 159], "context_extra": 92, "context_fmha": [23, 102, 122, 148], "context_fmha_fp32_acc": 164, "context_fmha_typ": [117, 148, 151], "context_init": 182, "context_len": [83, 150, 167], "context_length": [145, 146, 150, 155], "context_logit": [29, 32, 33, 150, 159], "context_mem_s": 150, "context_onli": 159, "context_parallel_s": [27, 95, 159], "context_phas": [83, 117], "context_pre_onli": 146, "context_request": 182, "context_serv": [17, 92], "contextchunkingpolici": [0, 109, 144, 159, 164], "contextfmha": 1, "contextfmhatyp": 148, "contextidx": 0, "contextlogit": 0, "contextmanag": 158, "contextparallel": [0, 1], "contextphaseparam": [0, 159], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contigu": [12, 92, 108, 114, 120, 138, 145, 164], "continu": [1, 5, 7, 11, 12, 17, 20, 23, 41, 42, 64, 83, 89, 102, 106, 109, 110, 115, 117, 125, 139, 142, 150, 159, 163, 174, 182], "contract": [44, 136], "contrast": [83, 118, 125, 167], "contrib": [3, 164], "contribut": [8, 10, 11, 14, 15, 16, 44, 89, 105, 131, 136, 145, 164, 174], "contributor": [8, 13, 16, 17, 151], "control": [0, 8, 18, 19, 20, 21, 26, 29, 30, 32, 33, 34, 35, 41, 50, 58, 71, 72, 82, 83, 84, 92, 102, 106, 108, 109, 114, 117, 118, 119, 124, 135, 136, 137, 144, 145, 146, 150, 153, 159, 164, 168], "contronl": 11, "conv": 145, "conv1d": [23, 145, 146, 148], "conv2d": [145, 146], "conv3d": [145, 146], "conv_bia": 145, "conv_kernel": 150, "conv_stat": 147, "conv_state_or_ptr": 145, "conv_transpose2d": 145, "conv_weight": 145, "convei": 38, "conveni": [1, 11, 110, 127, 131], "convent": [8, 91, 131, 145, 176], "converg": 20, "convers": [1, 6, 7, 17, 20, 70, 92, 98, 130, 162, 164], "convert": [0, 1, 12, 16, 20, 27, 44, 66, 83, 89, 91, 104, 108, 122, 126, 127, 128, 129, 130, 131, 136, 137, 139, 155, 159, 164, 167, 174, 176], "convert_and_load_weights_into_trtllm_llama": 131, "convert_checkpoint": [116, 122, 126, 127, 128, 129, 131, 137, 138, 155, 164], "convert_enable_dis": 148, "convert_hf_mpt_legaci": 164, "convert_load_format": 159, "convert_util": 164, "convert_weights_from_custom_training_checkpoint": 131, "convkernel": 1, "convolut": [0, 150], "convtranspose2d": 146, "cooper": 109, "coordin": [12, 16, 20, 30, 34, 41, 42, 109, 125, 145], "copi": [0, 1, 10, 11, 16, 23, 64, 83, 92, 96, 97, 108, 114, 121, 125, 133, 139, 145, 148, 151, 159, 164, 167], "copy_": 64, "copy_on_partial_reus": [97, 159], "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [3, 4, 6, 8, 10, 12, 15, 20, 21, 24, 42, 45, 86, 89, 90, 96, 97, 105, 109, 110, 118, 119, 122, 126, 129, 131, 136, 138, 148, 155, 159, 164, 166, 170, 174, 175], "corner": [15, 30], "coroutin": [61, 62, 159], "corpor": 64, "correct": [10, 11, 12, 14, 19, 20, 34, 83, 89, 105, 108, 115, 117, 122, 125, 164, 174], "correctli": [10, 41, 92, 96, 108, 114, 121, 145, 160, 164, 165], "correl": 19, "correspond": [0, 1, 8, 10, 12, 14, 16, 17, 19, 27, 41, 43, 82, 83, 96, 103, 107, 108, 111, 116, 117, 119, 120, 122, 125, 130, 131, 135, 143, 145, 146, 150, 153, 155, 159, 160, 164, 165], "correspondingli": 16, "corridor": 30, "corrupt": 16, "cost": [8, 13, 14, 15, 16, 19, 21, 44, 66, 104, 108, 121, 129, 136, 138, 151, 164], "costli": 13, "cot": [24, 109, 164], "could": [0, 10, 12, 16, 20, 21, 31, 34, 55, 60, 61, 62, 63, 92, 107, 108, 110, 113, 119, 120, 121, 128, 137, 151, 155, 156, 158, 159, 163, 164], "couldn": [102, 142], "count": [0, 1, 8, 16, 20, 45, 53, 54, 66, 103, 107, 108, 118, 136, 158, 159], "count_include_pad": [145, 146], "counterpart": 0, "countlocallay": 1, "countlowerranklay": 1, "countri": 94, "coupl": [10, 41], "cours": 125, "court": [55, 113, 156, 158, 163], "cover": [2, 8, 16, 21, 26, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 139, 140, 141, 143], "coverag": [10, 16, 108], "cp": 95, "cp312": 110, "cp_config": [95, 159], "cp_group": [145, 146], "cp_rank": [145, 146], "cp_size": [27, 145, 146, 149, 164], "cp_split_plugin": 145, "cp_type": 95, "cpp": [15, 44, 45, 83, 110, 115, 117, 118, 124, 129, 136, 137, 150, 155, 164], "cpp_e2e": 150, "cpp_llm_onli": 150, "cpp_onli": 110, "cptype": 95, "cpu": [0, 1, 13, 14, 16, 19, 21, 22, 23, 27, 42, 64, 65, 83, 96, 97, 99, 100, 101, 109, 120, 121, 122, 126, 129, 145, 151, 155, 159, 164, 167, 177, 178], "cpu_tensor": 64, "cpumemusag": [0, 159], "craft": 12, "crash": 164, "crd": 52, "creat": [1, 8, 10, 11, 13, 18, 19, 20, 26, 27, 29, 30, 31, 32, 33, 34, 35, 40, 44, 45, 50, 52, 55, 56, 60, 61, 62, 63, 64, 66, 75, 76, 77, 78, 79, 80, 83, 86, 92, 94, 96, 97, 99, 102, 103, 106, 107, 108, 109, 110, 113, 114, 115, 119, 120, 121, 125, 126, 127, 129, 131, 132, 136, 137, 142, 143, 145, 146, 147, 148, 150, 151, 156, 158, 159, 160, 163, 164, 165, 166, 167, 170, 177, 182], "create_allreduce_plugin": 145, "create_attention_const_param": 146, "create_builder_config": 126, "create_cuda_graph_metadata": [83, 107, 167], "create_execution_context": 150, "create_fake_weight": 145, "create_from_prompt": 11, "create_network": 129, "create_pytorch_model_based_executor": [181, 182], "create_runtime_default": 147, "create_scaffolding_output": 11, "create_sinusoidal_posit": 145, "create_sinusoidal_positions_for_attention_plugin": 145, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 145, "create_sinusoidal_positions_long_rop": 145, "create_sinusoidal_positions_long_rope_for_attention_plugin": 145, "create_sinusoidal_positions_yarn": 145, "createloramodul": 1, "creation": [1, 42, 84, 145, 151, 159, 168], "creativ": [72, 118], "creator": [1, 159], "creatorptr": 1, "credenti": 9, "criteria": [42, 101, 150, 178], "critic": [8, 12, 13, 16, 17, 42, 44, 108, 136, 155], "crop": 146, "cropped_pos_emb": 146, "cross": [0, 10, 11, 13, 14, 16, 30, 64, 108, 122, 123, 145, 150, 159, 164], "cross_attent": [146, 150], "cross_attention_dim": 146, "cross_attention_mask": [146, 150], "cross_attention_mask_for_context": 150, "cross_attention_mask_for_gen": 150, "cross_attention_norm": 146, "cross_attention_norm_num_group": 146, "cross_attention_packed_mask": 146, "cross_attn_dens": [23, 122], "cross_attn_k": [23, 122], "cross_attn_q": [23, 122], "cross_attn_qkv": [23, 122], "cross_attn_v": [23, 122], "cross_kv": 145, "cross_kv_cache_block_offset": [146, 150], "cross_kv_cache_fract": [150, 159], "cross_kv_cache_gen": [146, 147], "cross_kv_length": 145, "cross_kv_reus": [146, 147], "crossattentionmask": 0, "crosskvcachefract": [0, 164], "crosskvcachestat": 0, "crucial": [7, 8, 10, 34, 65, 71, 125, 129, 166], "csv": 22, "ct": 34, "cta": 20, "ctor": 145, "ctrl": [26, 31], "ctx": [0, 2, 8, 17, 31, 108], "ctx1dep4": 17, "ctx_len": 8, "ctx_param": [17, 92], "ctx_request_id": 159, "ctxenginepath": 0, "ctxexecutorconfig": 0, "ctxreqrat": 17, "cu": [13, 129], "cu12": 164, "cu130": 113, "cuassert": 155, "cubin": 164, "cubla": [15, 21], "cublaslt": [23, 143, 148], "cublasltmatmul": 15, "cublasscaledmm": 15, "cuda": [0, 1, 12, 14, 15, 16, 18, 20, 21, 22, 26, 29, 30, 31, 32, 33, 34, 35, 43, 44, 64, 66, 71, 83, 90, 92, 93, 96, 106, 107, 109, 110, 113, 114, 117, 123, 129, 135, 136, 147, 150, 151, 152, 155, 159, 161, 164, 167, 172, 175, 181], "cuda_arch": 110, "cuda_architectur": [2, 110, 124], "cuda_graph": 71, "cuda_graph_batch_s": [26, 29, 30, 32, 33, 34, 35, 73, 84, 159, 164, 168, 172], "cuda_graph_cache_s": 159, "cuda_graph_config": [2, 9, 14, 15, 21, 31, 45, 51, 64, 68, 71, 73, 106, 108, 159, 164], "cuda_graph_inst": 155, "cuda_graph_mod": [150, 155, 159], "cuda_graph_padding_en": 73, "cuda_hom": 113, "cuda_launch_block": 155, "cuda_stream": 155, "cuda_stream_guard": 150, "cuda_stream_sync": 145, "cuda_visible_devic": 92, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": [10, 84, 89, 90, 108, 164, 168, 174, 175], "cudagraphcaches": 0, "cudagraphconfig": [71, 73, 109, 159], "cudagraphlaunch": [20, 155], "cudagraphmod": 0, "cudagraphrunn": 107, "cudagriddependencysynchron": 12, "cudahostregist": 20, "cudalaunchhostfunc": 10, "cudamalloc": [1, 20, 92, 114], "cudamallocasync": [1, 92, 114], "cudamallocmanag": 20, "cudamemadvis": 20, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": [43, 135], "cudart": 155, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudatriggerprogrammaticlaunchcomplet": 12, "cudavirtualmemori": 1, "cudavirtualmemoryalloc": 1, "cudavirtualmemorychunk": 1, "cudavirtualmemorymanag": 1, "cudevic": 1, "cudeviceptr": 1, "cudnn": [21, 164], "cufil": 0, "cuh": 13, "cultur": 35, "cumbersom": 108, "cumemaccessdesc": 1, "cumemallocationprop": 1, "cumemcr": 1, "cumemgenericallocationhandl": 1, "cumemimportfromshareablehandl": [92, 114], "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [145, 164], "cumsumgenerationlength": 1, "cumsumlastdim": 145, "cumsumlength": 1, "cumul": [0, 1, 8, 72, 145, 159], "cumulative_logprob": [50, 159], "cupi": 10, "curand": 164, "curat": [29, 30, 32, 33, 34, 35, 36], "curl": [9, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 69, 81, 92, 100, 109, 163, 172], "currenc": [44, 136], "current": [0, 1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 27, 40, 41, 42, 44, 50, 59, 70, 83, 86, 89, 91, 92, 94, 95, 96, 97, 98, 101, 102, 104, 105, 106, 107, 110, 111, 113, 114, 115, 117, 122, 125, 136, 139, 142, 143, 144, 145, 148, 150, 151, 157, 159, 164, 166, 167, 170, 174, 176, 181, 182], "current_image_tag": 40, "current_prompt": 11, "current_stream": 155, "current_torch_vers": 113, "currentexpandindic": 1, "curti": 64, "curv": [6, 10, 17, 20, 95], "custom": [3, 10, 11, 13, 14, 16, 17, 22, 23, 27, 31, 40, 52, 55, 64, 66, 67, 72, 82, 83, 86, 88, 89, 90, 92, 94, 96, 106, 107, 109, 110, 118, 129, 131, 139, 143, 145, 148, 150, 158, 159, 162, 164, 166, 167, 170, 173, 174, 175], "custom_all_reduc": 164, "custom_format": [91, 176], "custom_mask": 145, "custom_module_dir": [22, 27], "custom_op": 108, "customallreduc": 164, "customcheckpointload": [91, 176], "customconfigload": [91, 176], "customdataset": 164, "customiz": 64, "customized_key_dict": 130, "customized_preprocess": 130, "customizedmodulea": 130, "customizedmoduleb": 130, "customweightload": [91, 176], "customweightmapp": [91, 176], "custream": 1, "cut": 21, "cutedsl": [73, 159], "cutlass": [15, 21, 26, 27, 29, 30, 32, 33, 34, 35, 73, 124, 159, 164], "cutlass_kernel": 124, "cutoff": 159, "cxx11": [110, 164], "cycl": [20, 65], "cyclic": [8, 98, 150, 164], "d": [1, 9, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 44, 46, 47, 48, 49, 67, 68, 69, 70, 92, 122, 124, 132, 136, 145, 146, 155, 163, 164, 172], "d0": 13, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 136, "d1": 107, "d2": 107, "d2h": [10, 159], "d2t": 10, "d3": 107, "d_": 14, "d_6": 14, "dag": 108, "dai": [89, 162, 174], "dailymail": 24, "dame": 35, "dangl": 119, "data": [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 29, 31, 32, 33, 40, 41, 44, 45, 64, 66, 73, 76, 83, 92, 93, 96, 97, 100, 104, 108, 114, 117, 118, 120, 123, 129, 130, 136, 137, 145, 147, 154, 155, 159, 160, 161, 164, 165], "data_devic": 22, "data_path": 68, "data_ptr": 164, "data_typ": [126, 128], "databas": [28, 29, 30, 37, 64, 91, 109, 176], "dataclass": [38, 64, 96], "datacontext": 0, "dataset": [10, 11, 13, 14, 15, 18, 20, 21, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 43, 47, 68, 76, 84, 99, 103, 109, 135, 139, 159, 164, 168, 177], "dataset_fil": [31, 45], "dataset_path": [24, 99, 136, 177], "datatyp": [0, 1, 118, 129, 145, 150, 153, 155], "datatypetrait": 1, "date": [94, 131], "datetim": 159, "db": 39, "dbrx": [153, 154, 164], "dbrxconfig": 147, "dbrxforcausallm": 147, "dconv": 145, "ddc": 108, "de": 1, "deactiv": 50, "dead": [108, 164], "deal": [83, 108, 117, 119, 155], "dealloc": [1, 42, 120, 182], "death": [55, 113, 156, 158, 163], "debug": [0, 16, 20, 22, 23, 24, 27, 65, 82, 87, 90, 97, 110, 120, 148, 150, 151, 159, 164, 170, 171, 175], "debug_buff": 155, "debug_mod": [150, 155], "debug_tensors_to_sav": 150, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [23, 150, 164], "decai": [0, 118, 159], "decid": [16, 44, 56, 83, 96, 104, 117, 128, 136, 140, 142, 153, 166, 182], "decilmforcausallm": [154, 161], "decim": 159, "decis": [8, 16, 20, 70, 145, 170], "declar": [1, 118, 119, 131, 166, 181], "decltyp": [0, 1], "decod": [0, 1, 8, 12, 13, 15, 16, 17, 18, 24, 27, 42, 58, 65, 72, 79, 82, 83, 89, 92, 93, 95, 96, 98, 100, 101, 106, 108, 109, 114, 117, 118, 127, 131, 136, 145, 147, 148, 150, 154, 156, 158, 159, 160, 161, 162, 164, 165, 174, 178, 180, 181], "decode_batch": 150, "decode_duration_m": [57, 97, 159], "decode_regular": 150, "decode_retention_polici": 97, "decode_retention_prior": [57, 159], "decode_stream": 150, "decode_words_list": 150, "decode_wrapp": [83, 167], "decodedurationm": 0, "decoder_input_id": [147, 150], "decoder_language_adapter_rout": 150, "decoder_lay": [160, 165], "decoder_start_token_id": 23, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderinputbuff": 1, "decoderjsonconfigstr": 0, "decoderlay": [160, 165], "decoderlayerlist": 127, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": [83, 117], "decodermodel": [0, 147, 160, 165], "decodermodelforcausallm": [127, 131, 147, 160, 165], "decodermodelpath": 0, "decoderst": 164, "decoderxqarunn": [83, 117], "decoding_config": 159, "decoding_typ": [2, 9, 14, 18, 29, 107, 108, 159], "decodingbaseconfig": 159, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 164], "decodingoutputptr": 1, "decompos": [16, 83, 102, 117], "decomposit": [99, 177], "decor": [10, 12, 38, 159, 160, 165], "decoupl": [10, 11, 13, 16, 17, 92, 98, 103, 124, 151], "decreas": [2, 3, 4, 8, 10, 20, 87, 139, 171], "dedic": [8, 10, 13, 15, 16, 17, 20, 21, 38, 42, 155, 159], "deduc": [16, 23, 24, 27, 164], "deem": 11, "deep": [4, 5, 12, 18, 21, 43, 109, 129, 135, 145, 159, 164], "deepcopi": 11, "deepep": [16, 108, 164], "deeper": [12, 14], "deepgemm": [2, 29, 36, 51, 73, 159, 164], "deeplearn": [129, 145, 155], "deepli": 16, "deepseek": [8, 10, 11, 12, 16, 20, 24, 27, 43, 74, 81, 90, 95, 103, 104, 105, 107, 108, 109, 135, 154, 159, 161, 162, 164, 175], "deepseek_model": 107, "deepseek_r1_output": 29, "deepseek_v1": 164, "deepseek_v2": 164, "deepseek_v3": [13, 164], "deepseekforcausallm": 147, "deepseeksparseattentionconfig": [73, 109, 159], "deepseekv1config": 147, "deepseekv2": 145, "deepseekv2attent": 146, "deepseekv2config": 147, "deepseekv2forcausallm": 147, "deepseekv32forcausallm": 161, "deepseekv3forcausallm": [154, 161], "deepseekv3routingimpl": 15, "deepspe": 128, "def": [10, 11, 38, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 70, 71, 72, 73, 74, 91, 95, 106, 108, 113, 119, 127, 129, 130, 131, 137, 139, 143, 144, 155, 156, 158, 159, 160, 163, 165, 176, 182], "default": [0, 1, 10, 11, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 42, 43, 44, 45, 50, 56, 57, 65, 66, 70, 71, 72, 73, 74, 82, 83, 84, 85, 87, 90, 91, 92, 97, 98, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 113, 114, 115, 116, 117, 118, 121, 124, 128, 130, 131, 132, 135, 139, 140, 141, 142, 143, 144, 145, 147, 148, 150, 151, 153, 155, 159, 160, 164, 165, 167, 168, 169, 171, 175, 176, 178], "default_factori": [64, 159], "default_net": 145, "default_plugin_config": 147, "default_record_cr": 159, "default_trtnet": 129, "defaultvalu": 1, "defer": [20, 145], "defin": [0, 1, 2, 5, 8, 11, 16, 22, 23, 38, 39, 40, 41, 44, 65, 83, 86, 91, 92, 94, 96, 97, 102, 107, 108, 115, 117, 119, 125, 128, 129, 130, 131, 136, 143, 145, 146, 148, 153, 159, 160, 162, 164, 165, 167, 170, 176], "definit": [11, 13, 52, 83, 89, 91, 108, 109, 115, 117, 120, 131, 145, 155, 162, 164, 174, 176], "defrag": 20, "deftruth": 164, "degrad": [0, 18, 23, 139, 159], "degre": [16, 40, 45, 55, 60, 61, 63, 113, 139, 143, 156, 158, 163], "del": 64, "delai": [8, 16, 17, 20, 26, 29, 30, 32, 33, 45, 92, 159, 164], "deleg": [83, 145, 167], "delet": [0, 1, 16, 148, 155, 159], "delimit": 159, "deliv": [2, 3, 6, 8, 12, 13, 14, 19, 21, 26, 45, 103, 162], "delta": [0, 13, 14, 145, 146], "delta_bia": 145, "delta_softplu": 145, "delv": 15, "demand": [13, 15, 16, 17, 66, 92, 103, 159], "demo": [11, 13, 27, 47, 64, 65, 72, 76, 89, 174], "demo_prompt": 72, "demollm": [84, 85, 90, 168, 169, 175], "demonstr": [4, 8, 10, 11, 12, 13, 16, 17, 20, 21, 42, 64, 65, 67, 71, 72, 73, 88, 95, 96, 100, 106, 115, 130, 137, 139, 142, 143, 158, 173], "demonstrate_beam_search": 72, "demonstrate_combined_sampl": 72, "demonstrate_greedy_decod": 72, "demonstrate_multiple_sequ": 72, "demonstrate_temperature_sampl": 72, "demonstrate_top_k_sampl": 72, "demonstrate_top_p_sampl": 72, "demonstrate_with_logprob": 72, "denois": 146, "denot": 125, "dens": [30, 45, 83, 116, 117, 122, 128, 130, 145, 164], "dense_4h_to_h": 130, "dense_bia": 146, "dense_h_to_4h": 130, "denser": 108, "densiti": [7, 21], "dep": 110, "dep4": [17, 30], "dep8": [17, 30], "depend": [0, 5, 10, 16, 17, 18, 20, 24, 27, 30, 34, 35, 45, 56, 82, 83, 92, 98, 108, 110, 113, 115, 117, 118, 119, 124, 125, 128, 137, 139, 143, 145, 151, 155, 159, 163, 164, 181], "deploi": [11, 12, 16, 20, 27, 36, 52, 89, 90, 103, 109, 125, 128, 174, 175], "deplot": [154, 164], "deploy": [7, 8, 13, 16, 17, 18, 19, 20, 21, 52, 86, 90, 92, 95, 103, 136, 139, 157, 158, 162, 163, 164, 170, 175], "deprec": [23, 124, 136, 159, 164], "deprecatedparseprotocol": 159, "deprecationwarn": [38, 136], "depriorit": 124, "depriv": 119, "depth": [90, 125, 159, 175], "dequ": [0, 1], "dequant": [12, 83, 117, 123, 145], "deregistermemori": 0, "deriv": [12, 19, 107, 129, 130, 145, 151, 166], "desc": [0, 1], "descend": 97, "descendli": 118, "describ": [0, 2, 6, 16, 17, 21, 27, 38, 39, 41, 44, 45, 47, 72, 76, 83, 96, 97, 107, 110, 113, 117, 118, 120, 121, 122, 125, 127, 129, 130, 133, 136, 143, 145, 153, 155, 167], "descript": [0, 1, 22, 26, 27, 29, 30, 32, 33, 34, 35, 38, 44, 45, 65, 70, 71, 82, 83, 84, 85, 90, 94, 95, 96, 109, 118, 122, 136, 143, 145, 148, 159, 167, 168, 169, 175], "descriptor": [0, 159], "deseri": [0, 20, 131], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializeblockkei": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcachecreateddata": 0, "deserializekvcacheev": 0, "deserializekvcacheeventdiff": 0, "deserializekvcacheremoveddata": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializekvcachestoredblockdata": 0, "deserializekvcachestoreddata": 0, "deserializekvcacheupdateddata": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializemultimodalinput": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "deserializeuniquetoken": 0, "design": [1, 2, 7, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 36, 41, 42, 64, 65, 83, 89, 90, 91, 92, 95, 96, 97, 103, 108, 109, 123, 125, 129, 130, 131, 137, 158, 162, 163, 166, 167, 174, 175, 176, 181], "desir": [22, 40, 45, 83, 103, 106, 115, 145, 159, 167, 176], "desired_world_s": [88, 173], "despit": [8, 11], "destin": [0, 67, 68, 69], "destroi": [1, 64, 96, 151], "destroyipcmemori": 1, "destruct": 1, "destructor": 1, "detach": 18, "detail": [0, 2, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 23, 24, 26, 27, 29, 31, 34, 35, 36, 39, 40, 44, 45, 50, 52, 55, 59, 71, 83, 92, 95, 97, 100, 103, 106, 115, 117, 123, 125, 127, 129, 136, 137, 139, 144, 145, 147, 151, 155, 159, 163, 164, 166, 167, 181], "detect": [0, 16, 20, 22, 24, 27, 40, 115, 145, 159, 164, 170], "detect_format": 130, "detect_shard": [86, 170, 173], "determin": [0, 1, 11, 14, 16, 17, 20, 42, 83, 89, 90, 92, 97, 98, 102, 107, 117, 118, 122, 131, 138, 139, 144, 145, 147, 153, 159, 166, 174, 175, 181, 182], "determinenumpag": 1, "determinist": [64, 72, 143, 159, 164], "detoken": [20, 21, 42, 159, 164, 166], "detokenizedgenerationresultbas": 159, "dev": [9, 16, 18, 21, 24, 29, 30, 31, 32, 33, 34, 35, 89, 109, 113, 164, 174], "dev_container_imag": 40, "dev_trtllm_imag": 110, "devcontain": 40, "devel": [110, 132, 133], "develop": [8, 11, 13, 14, 16, 17, 18, 20, 21, 24, 29, 30, 32, 33, 38, 39, 40, 41, 55, 60, 61, 63, 64, 89, 90, 92, 96, 100, 105, 110, 113, 127, 128, 129, 131, 132, 137, 145, 154, 156, 158, 160, 162, 163, 164, 165, 174, 175], "deviat": [16, 22, 45], "devic": [0, 1, 10, 16, 17, 20, 21, 22, 29, 30, 41, 64, 66, 92, 114, 139, 145, 147, 149, 150, 155, 159, 170], "device_cache_perc": [99, 177], "device_id": 150, "device_map": 149, "device_memory_size_v2": 151, "device_num_expert": 145, "device_request_typ": 147, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 114], "dgx": [2, 15, 21, 39, 45, 118, 129], "di": [14, 16, 17, 92], "diagnost": 1, "diagon": 145, "diagram": [11, 12, 15, 17, 92, 125], "dial": 56, "diamond": [13, 15, 24], "dict": [38, 86, 91, 94, 127, 130, 131, 145, 147, 150, 159, 160, 164, 165, 176, 181], "dict_kei": 155, "dictat": 142, "dictionari": [19, 82, 86, 91, 128, 130, 146, 159, 170, 176], "did": 12, "didn": [102, 108, 142], "differ": [0, 1, 2, 7, 8, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 29, 30, 32, 33, 34, 36, 37, 41, 44, 45, 47, 64, 65, 72, 76, 83, 86, 90, 91, 92, 95, 96, 98, 99, 102, 103, 104, 106, 107, 108, 110, 114, 116, 117, 118, 120, 121, 123, 127, 128, 129, 130, 131, 136, 137, 139, 142, 143, 145, 147, 148, 150, 151, 153, 159, 162, 163, 164, 167, 170, 175, 176, 177, 180], "differenti": 145, "difficult": [8, 10, 108], "difficulti": [11, 24, 107], "difftyp": 1, "diffus": [27, 47, 76, 146, 164], "diffusersattent": 146, "difi": 11, "digit": [19, 159], "dilat": [145, 146], "dim": [0, 1, 108, 145, 146, 147, 150, 155], "dim0": 145, "dim1": 145, "dim_head": 146, "dim_in": 146, "dim_out": 146, "dim_rang": 145, "dimems": 1, "dimens": [0, 1, 12, 15, 16, 26, 83, 85, 108, 117, 118, 122, 145, 146, 147, 151, 155, 159, 160, 164, 165, 169, 170], "dimension": 145, "diminish": [11, 16], "dimrang": 145, "dimtype64": [0, 1], "dir": [9, 22, 26, 29, 30, 31, 32, 33, 34, 35, 44, 50, 110, 136], "direct": [0, 12, 17, 30, 31, 86, 92, 113, 114, 123, 131, 155, 170], "directli": [0, 10, 11, 12, 14, 15, 16, 17, 20, 29, 30, 32, 33, 34, 35, 38, 50, 83, 89, 91, 92, 97, 104, 107, 108, 110, 114, 118, 119, 125, 129, 131, 133, 136, 143, 144, 145, 158, 159, 163, 164, 167, 174, 176, 182], "directori": [0, 9, 16, 20, 21, 22, 23, 24, 27, 29, 30, 32, 33, 34, 35, 44, 45, 64, 67, 68, 69, 70, 89, 91, 110, 115, 127, 128, 129, 130, 131, 136, 137, 147, 150, 158, 159, 160, 164, 165, 174, 176], "dirnam": 64, "disabl": [0, 1, 10, 12, 16, 21, 22, 23, 24, 27, 40, 56, 65, 92, 101, 106, 107, 117, 118, 121, 126, 130, 136, 139, 143, 144, 145, 148, 150, 151, 159, 164, 172, 178], "disable_chunked_context": 22, "disable_finalize_fus": 159, "disable_flashinfer_sampl": 159, "disable_forward_chunk": 147, "disable_kv_cach": 150, "disable_kv_cache_reus": [24, 100], "disable_overlap_schedul": [15, 31, 74, 92, 101, 106, 107, 159, 178], "disable_weight_only_quant_plugin": 147, "disable_xqa": 117, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [131, 138], "disagg": [105, 113, 164], "disagg_cluster_uri": 27, "disagg_config": [17, 92], "disagg_executor": 0, "disaggexecutororchestr": 0, "disaggr_torch": 31, "disaggreg": [0, 8, 10, 20, 52, 93, 95, 96, 105, 109, 159, 161, 162, 164, 172], "disaggregated_param": [21, 29, 30, 31, 32, 33, 34, 159], "disaggregatedparam": [109, 159], "disaggserverbenchmark": 164, "disallow": [10, 159], "disappear": 10, "discard": [11, 65, 139, 159], "disclaim": [14, 137, 139, 142, 143], "disclosur": 164, "disconnect": 164, "discourag": [0, 66, 118, 159], "discov": [10, 20, 72, 113, 129], "discover": 38, "discoveri": 40, "discrep": [17, 110, 160, 165], "discret": 97, "discuss": [11, 14, 16, 20, 117, 137, 139, 143, 144, 164], "disk": [64, 91, 96, 110, 115, 131, 176], "dispar": 8, "dispatch": [0, 11, 12, 13, 16, 17, 50, 92, 103, 107, 108, 116, 131, 164], "dispers": 38, "displai": [8, 16, 38, 159], "disrupt": 103, "disservingrequeststat": 0, "disservingstat": 0, "dist": [2, 41, 43, 44, 45, 68, 135, 136, 137], "distanc": [20, 83, 117, 145], "distil": [11, 90, 164, 175], "distinct": [8, 13, 17, 65, 91, 120, 122, 125, 145, 159, 176], "distinguish": [34, 121], "distribut": [1, 13, 16, 19, 22, 26, 58, 64, 67, 82, 83, 91, 95, 102, 103, 105, 116, 117, 118, 129, 136, 145, 150, 151, 156, 158, 159, 162, 170], "distserv": 114, "dit": [147, 164], "div": 145, "dive": [12, 14, 43, 109, 135], "diverg": [86, 170], "divers": [0, 8, 43, 118, 135], "diversity_penalti": 118, "divid": [11, 12, 14, 16, 97, 98, 130, 145, 164], "divup": 145, "dl": 7, "dlsym": 0, "do": [1, 2, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 26, 29, 30, 31, 32, 33, 34, 35, 39, 50, 64, 83, 92, 104, 107, 108, 109, 110, 114, 119, 130, 131, 137, 139, 143, 145, 155, 159, 160, 165, 167], "do_cross_attent": [145, 146], "do_layer_norm_befor": 128, "do_sampl": 118, "do_trac": 159, "doactivationkernel": 20, "doc": [1, 2, 6, 13, 16, 21, 129, 133, 139, 143, 145, 155, 164], "docker": [2, 9, 26, 36, 67, 68, 69, 109, 155, 164], "docker_run_arg": 2, "dockerfil": [110, 132], "docstr": 38, "document": [0, 4, 5, 7, 10, 12, 14, 16, 17, 22, 26, 27, 29, 38, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 59, 75, 76, 77, 78, 79, 80, 83, 92, 96, 98, 99, 110, 111, 114, 117, 118, 120, 121, 122, 125, 127, 128, 129, 131, 134, 135, 137, 144, 145, 151, 153, 155, 159, 166, 167, 177], "doe": [0, 2, 3, 10, 11, 15, 16, 19, 23, 28, 29, 30, 36, 37, 39, 40, 44, 45, 64, 73, 78, 83, 84, 92, 96, 102, 107, 108, 111, 114, 117, 118, 122, 125, 131, 136, 143, 145, 150, 151, 154, 159, 160, 164, 165, 168, 182], "doesn": [1, 8, 13, 20, 29, 30, 32, 33, 42, 44, 50, 74, 83, 98, 108, 117, 132, 136, 142, 143, 164, 172], "dog": 34, "dollar": [44, 136], "domain": [16, 20, 24, 92, 114, 123], "domin": [8, 13, 16, 164], "don": [8, 9, 11, 15, 16, 29, 30, 31, 32, 33, 34, 35, 64, 92, 103, 108, 114, 125, 131, 132, 138, 143, 145, 150], "done": [1, 2, 15, 16, 17, 18, 26, 29, 30, 31, 32, 33, 34, 35, 56, 92, 102, 104, 107, 113, 121, 129, 136, 139, 142, 145, 148, 159, 160, 165], "dongjiyingdji": 164, "dora": [23, 145, 146, 148], "dora_plugin": [23, 122, 145, 148], "dot": [13, 19, 130, 145], "doubl": [0, 4, 11, 19, 34, 140, 141, 143, 155], "doubt": 11, "down": [0, 3, 10, 11, 14, 15, 16, 20, 34, 70, 115, 122, 138, 145, 150], "down_proj": [130, 170], "downgrad": 164, "download": [21, 24, 29, 30, 32, 33, 34, 35, 44, 67, 68, 69, 70, 74, 109, 110, 112, 113, 136, 137, 155, 158, 164], "downsampl": 12, "downscale_freq_shift": 146, "downsid": 143, "downstream": [100, 153], "dp": [0, 2, 3, 6, 8, 12, 13, 15, 17, 21, 30, 34, 35, 159, 164], "dp4ep4": 21, "dp8": [13, 15], "dprank": 0, "dpsize": 0, "dpu": 21, "draft": [0, 1, 13, 14, 18, 19, 20, 23, 109, 147, 150, 159, 164], "draft_len": 147, "draft_len_schedul": 159, "draft_model": 107, "draft_path": 150, "draft_target": [74, 159], "draft_target_model": 125, "draft_token": [147, 159], "draft_tokens_extern": [23, 147], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [10, 107, 125, 159, 164], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftlogitshost": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttarget": 107, "drafttargetdecodingconfig": [107, 109, 159], "drafttoken": [0, 1], "drafttokenid": 1, "drafttokenidshost": 1, "drafttokensextern": 1, "dram": [0, 129], "dramat": [8, 98], "drastic": [15, 42], "drat": 107, "draw": 19, "drawback": 38, "dreamgenx": 164, "drive": [66, 96, 129, 136], "driven": [8, 16, 108], "driver": [16, 20, 29, 30, 31, 32, 33, 34, 35, 42, 92, 113, 114, 151, 164], "drop": [2, 8, 12, 14, 15, 92, 114, 139, 142, 144], "dropdown": [29, 30, 32, 33, 34, 35], "dropout": [146, 164], "dropout_prob": 146, "dry": [34, 159], "dry_run": [23, 159, 164], "dsa": [73, 159], "dst": 1, "dstate": 145, "dstdesc": 0, "dsttype": 1, "dsv3_router_gemm_op": 108, "dt_proj": 145, "dt_rank": 145, "dtype": [1, 10, 12, 26, 29, 31, 32, 33, 44, 51, 73, 97, 104, 108, 119, 122, 126, 127, 128, 129, 131, 136, 137, 138, 145, 146, 147, 148, 149, 150, 155, 159, 161, 164, 181], "dual": 110, "duck": 159, "due": [0, 1, 2, 5, 8, 11, 12, 13, 15, 16, 18, 20, 21, 27, 29, 30, 32, 33, 34, 35, 39, 40, 41, 44, 65, 83, 102, 103, 106, 108, 110, 124, 125, 131, 136, 137, 142, 144, 150, 164, 167, 180], "duke": 30, "dummi": [22, 99, 137, 159, 164, 177], "dump": [0, 16, 94, 110, 115, 159], "dump_debug_buff": 150, "dumps_kwarg": 159, "duplic": [15, 20, 86, 164, 170], "duplicate_data": 145, "durat": [0, 12, 16, 20, 26, 29, 30, 32, 33, 97, 137], "duration_cast": 0, "duration_m": [97, 159], "durationm": 0, "dure": [0, 1, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 23, 31, 39, 41, 42, 43, 44, 71, 73, 82, 83, 84, 85, 91, 92, 95, 96, 97, 99, 102, 106, 107, 108, 110, 114, 117, 118, 119, 123, 124, 125, 126, 129, 135, 136, 143, 144, 148, 150, 151, 155, 159, 164, 167, 168, 169, 170, 176, 177, 181], "dutch": 30, "dynam": [0, 11, 13, 14, 16, 17, 20, 23, 44, 86, 90, 92, 103, 105, 107, 108, 114, 136, 145, 147, 148, 150, 151, 159, 162, 164, 170, 175, 182], "dynamic_batch_config": 159, "dynamic_batch_moving_average_window": 159, "dynamic_quant_bf16tonvfp4": 13, "dynamic_tree_max_topk": 159, "dynamicbatchconfig": [0, 109, 159], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 13, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynamicyamlmixinforset": [86, 170], "dynamo": [108, 109, 125, 158, 162], "dynamodeploymentgraph": 52, "dynasor": [109, 164], "dynasor_generation_control": 11, "dynasorgenerationcontrol": 11, "dynlibload": 0, "e": [0, 10, 11, 14, 15, 18, 20, 21, 24, 26, 27, 30, 31, 36, 38, 40, 42, 43, 44, 67, 68, 69, 82, 83, 86, 91, 92, 94, 96, 101, 103, 108, 110, 111, 113, 114, 115, 117, 120, 121, 122, 123, 130, 132, 135, 136, 145, 148, 150, 153, 155, 158, 159, 160, 163, 164, 165, 170, 178], "e2": [15, 17, 19, 31, 95, 109, 164], "e2el": [26, 29, 30, 31, 32, 33, 34, 35], "e2m1": 12, "e4m3": [4, 12, 104, 123], "e5m2": 4, "e728f08114c042309efeae4df86a50ca": 29, "e752184d1181494c940579c007ab2c5f": 18, "each": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39, 41, 42, 44, 45, 50, 57, 64, 66, 67, 68, 69, 72, 82, 83, 91, 92, 95, 97, 98, 102, 103, 105, 107, 108, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 128, 129, 136, 137, 138, 142, 143, 144, 145, 146, 148, 150, 151, 153, 155, 159, 164, 166, 167, 176, 181, 182], "eager": [15, 42, 108, 164], "eagl": [0, 1, 9, 10, 18, 23, 93, 106, 108, 147, 150, 159, 161, 162, 164, 180], "eagle3": [10, 74, 109, 125, 159, 164], "eagle3_layers_to_captur": 159, "eagle3_one_model": [18, 74, 107, 108, 159], "eagle_choic": [150, 159], "eagle_dynamic_tree_max_top_k": 150, "eagle_posterior_threshold": 150, "eagle_temperatur": 147, "eagle_use_dynamic_tre": 150, "eaglechoic": [0, 1], "eagleconfig": [0, 147], "eagledecodingconfig": [74, 107, 109, 159], "eagleforcausallm": 147, "eagleinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 14, "earli": [1, 8, 11, 38, 150, 155, 164], "earlier": [0, 11, 128, 139, 155], "early_stop": [118, 150, 159, 164], "early_stop_criteria": 150, "earlystop": [0, 1, 118], "eas": [11, 16, 17, 45], "easi": [7, 11, 16, 24, 65, 89, 91, 108, 137, 158, 162, 174, 176], "easier": [2, 12, 14, 16, 20, 44, 129, 131, 136], "easili": [2, 10, 13, 16, 31, 36, 108, 130, 145], "east": [30, 127, 129, 155], "eastern": 163, "ebnf": [0, 115, 159], "ebnf_grammar": 94, "echo": [18, 20, 27, 68, 69, 92, 113, 132, 133], "econom": 21, "ecosystem": [21, 158, 162], "eddi": 164, "edg": [4, 21], "edit": [40, 45, 110, 125], "edu": 24, "eec": 24, "ef648e7489c040679d87ed12db5d3214": 163, "effect": [0, 8, 10, 11, 12, 13, 14, 15, 20, 21, 23, 29, 30, 32, 33, 34, 35, 42, 65, 66, 72, 92, 97, 98, 102, 103, 114, 118, 123, 125, 139, 142, 143, 148, 159, 164], "effici": [8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 27, 39, 42, 47, 55, 60, 61, 63, 71, 76, 83, 89, 90, 92, 98, 99, 100, 102, 103, 113, 116, 117, 118, 121, 125, 129, 148, 151, 154, 156, 157, 158, 159, 162, 163, 166, 167, 174, 175, 177, 181], "effort": [8, 10, 14, 15, 16, 17, 20, 21, 30, 125, 128, 139, 164], "eg": 45, "egx": 21, "eiffel": 35, "eight": [2, 3], "einop": 145, "einstein": 145, "einsum": 145, "einsum_eq": 145, "either": [0, 1, 11, 13, 15, 20, 22, 24, 27, 31, 39, 91, 103, 115, 145, 151, 155, 158, 159, 164, 176], "elaps": [8, 26, 29, 30, 32, 33, 97], "element": [0, 1, 12, 16, 39, 83, 98, 102, 108, 117, 118, 122, 123, 145, 146, 153, 159], "element_typ": 1, "elementwis": [119, 145], "elementwise_affin": 146, "elementwise_binari": 145, "elementwise_sub": 119, "elementwise_sum": 119, "elementwiseoper": [119, 145], "eleutherai": [29, 32, 33, 44, 136], "elicit": 11, "elif": [72, 73, 74, 182], "elimin": [8, 10, 12, 13, 15, 23, 44, 64, 92, 102, 108, 114, 125, 136, 139, 142, 148, 162, 164], "ellipsi": 145, "els": [0, 11, 31, 50, 64, 65, 66, 73, 74, 76, 94, 107, 108, 129, 130, 131, 145, 155, 182], "emb": [76, 129, 146], "embed": [0, 14, 23, 100, 121, 127, 136, 145, 150, 159, 160, 164, 165, 167], "embed_dim": 146, "embed_posit": 146, "embed_positions_for_gpt_attent": 146, "embed_positions_for_gpt_attention_loc": 146, "embed_positions_loc": 146, "embed_token": [130, 160, 165], "embedding_bia": 159, "embedding_dim": 146, "embedding_multipli": 147, "embedding_parallel_mod": 159, "embedding_scal": 147, "embedding_sharding_dim": [128, 147], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [7, 13, 16], "emit": 159, "emot": 72, "emphas": 11, "emphasi": 128, "empir": [8, 12, 16], "emploi": [8, 16, 17, 42, 91, 125, 148, 166, 182], "employe": 66, "empow": [8, 13], "empti": [0, 1, 50, 97, 125, 145, 159, 164, 182], "empty_lik": 108, "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [145, 164], "en": 164, "enabl": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 36, 37, 40, 41, 42, 44, 45, 50, 62, 63, 64, 65, 66, 71, 73, 82, 83, 84, 85, 86, 90, 92, 94, 95, 96, 98, 99, 100, 101, 102, 104, 106, 107, 108, 109, 110, 114, 115, 117, 118, 119, 122, 123, 124, 125, 126, 129, 130, 133, 136, 137, 142, 144, 145, 146, 147, 148, 150, 153, 155, 159, 160, 161, 162, 163, 164, 165, 167, 168, 169, 170, 175, 177, 178, 181], "enable_": 38, "enable_allreduc": 145, "enable_attention_dp": [2, 16, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 45, 51, 68, 73, 103, 159], "enable_autotun": [18, 159, 164], "enable_bal": [8, 30, 159], "enable_batch_size_tun": 159, "enable_block_reus": [9, 18, 27, 31, 34, 56, 65, 71, 73, 97, 159], "enable_build_cach": [159, 164], "enable_cach": 38, "enable_chunked_context": [2, 22, 150, 164], "enable_chunked_prefil": [27, 28, 29, 30, 36, 37, 73, 98, 159, 164], "enable_context_fmha_fp32_acc": [150, 159], "enable_debug_output": [23, 155, 159], "enable_flash_attent": 38, "enable_forward_chunk": 147, "enable_fp8": 123, "enable_fullgraph": 159, "enable_if_t": 1, "enable_inductor": 159, "enable_iter_perf_stat": [27, 73, 159], "enable_iter_req_stat": 159, "enable_kv_cache_reus": 121, "enable_layerwise_nvtx_mark": 159, "enable_lm_head_tp_in_adp": [31, 159], "enable_lora": 159, "enable_max_num_tokens_tun": [159, 164], "enable_min_lat": [18, 159], "enable_mixed_sampl": 164, "enable_multi_devic": 164, "enable_offload": 65, "enable_overlap_schedul": 27, "enable_pad": [2, 15, 21, 26, 29, 30, 31, 32, 33, 34, 35, 45, 51, 71, 73, 108, 159], "enable_paged_kv_cach": 148, "enable_partial_reus": [97, 159], "enable_pdl": 108, "enable_piecewise_cuda_graph": [108, 159], "enable_prompt_adapt": [159, 164], "enable_qkv": 146, "enable_sleep": 159, "enable_think": [24, 79], "enable_tqdm": 159, "enable_trt_overlap": 164, "enable_trtllm_sampl": 106, "enable_ucx": 164, "enable_userbuff": [108, 159], "enable_xqa": 164, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 121], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 117, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [23, 150, 164], "enc_dec": 118, "encapsul": [83, 117, 118, 129, 145], "encdecmodelrunn": 150, "encod": [0, 4, 11, 12, 13, 23, 27, 42, 66, 83, 100, 117, 118, 145, 148, 150, 152, 153, 154, 159, 164], "encode_base64_content_from_url": 76, "encode_base64_imag": 76, "encoded_vocab": [0, 115], "encodedvocab": [0, 115], "encoder_hidden_st": [146, 147], "encoder_input_featur": 150, "encoder_input_id": 150, "encoder_input_len_rang": 164, "encoder_input_length": [145, 146, 150], "encoder_language_adapter_rout": 150, "encoder_max_input_length": [146, 150], "encoder_output": [146, 147, 150], "encoder_output_length": 150, "encoder_run": 150, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 147], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [2, 10, 18, 20, 21, 29, 30, 32, 33, 34, 35, 45, 92, 105, 113, 114, 130, 155, 159], "encourag": [0, 16, 26, 66, 118, 131, 159], "end": [0, 1, 8, 11, 14, 19, 21, 22, 23, 34, 42, 44, 66, 83, 85, 94, 102, 109, 117, 118, 129, 136, 139, 143, 144, 145, 148, 159, 164, 169, 181], "end_dim": 145, "end_id": [150, 159, 164], "end_tag": 94, "end_thinking_phase_token": 159, "end_token": [0, 159], "endeavor": [13, 16, 17], "endid": [0, 1], "endpoint": [9, 18, 21, 53, 54, 92, 100, 159, 163, 164], "endswith": [130, 159], "enforc": [137, 145, 159], "engag": 11, "engin": [0, 1, 6, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 44, 45, 50, 56, 57, 70, 84, 85, 92, 93, 97, 102, 103, 114, 115, 117, 118, 119, 122, 125, 126, 131, 138, 139, 142, 143, 144, 145, 147, 148, 150, 151, 155, 159, 161, 164, 168, 169, 172], "engine_buff": 150, "engine_dir": [22, 126, 127, 128, 129, 131, 136, 137, 150, 155], "engine_inspector": 150, "engine_llama_3": 129, "engine_nam": 150, "engine_output": 23, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "england": 94, "english": [19, 30, 34], "enhanc": [2, 8, 11, 13, 14, 15, 16, 17, 22, 41, 42, 66, 83, 89, 90, 92, 100, 116, 118, 125, 144, 151, 157, 159, 167, 174, 175], "enjoi": [55, 60, 61, 63, 113, 133, 156, 158, 163], "enough": [2, 14, 21, 65, 83, 97, 108, 117, 121, 142, 151, 164, 166, 182], "enqueu": [0, 96, 115, 129, 150, 151, 164], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 115], "enroot": 110, "enroot_allow_dev": 31, "ensembl": 17, "ensur": [8, 9, 10, 12, 14, 16, 18, 20, 22, 29, 30, 32, 33, 34, 35, 38, 42, 43, 44, 86, 92, 96, 110, 114, 115, 116, 119, 124, 131, 136, 142, 159, 160, 163, 165, 170, 181], "enter": [11, 45, 102, 110, 119, 132, 142, 181], "enterpris": [21, 59], "entir": [0, 3, 8, 11, 13, 16, 42, 44, 45, 86, 94, 103, 107, 115, 122, 129, 136, 145, 151, 159, 170, 181], "entri": [0, 1, 8, 11, 20, 22, 24, 38, 39, 42, 44, 63, 89, 110, 122, 136, 145, 164, 174], "entrypoint": [27, 132, 137, 158], "enum": [0, 1, 11, 94, 148, 159], "enumer": [0, 1, 56, 57, 62, 66, 71, 72, 73, 107], "env": [27, 30, 31, 34, 35, 40, 46, 47, 48, 49, 51, 53, 54, 87, 92, 136, 159, 171], "env_overrid": 159, "envelop": 16, "environ": [2, 8, 9, 12, 13, 16, 17, 20, 27, 29, 30, 31, 32, 33, 34, 35, 36, 43, 44, 47, 64, 67, 68, 69, 76, 78, 83, 89, 109, 110, 113, 118, 123, 125, 135, 136, 137, 139, 142, 143, 155, 157, 158, 159, 164, 167, 174], "environment": 130, "eo": [22, 26, 29, 30, 31, 32, 33, 34, 35, 66, 118, 159], "eof": [2, 9, 14, 16, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 51, 68, 94, 103], "eos_id": [16, 22], "eos_token": 66, "eos_token_id": [66, 115, 150], "ep": [2, 8, 12, 13, 14, 17, 20, 21, 22, 27, 29, 44, 45, 109, 116, 136, 145, 146, 164], "ep16": 20, "ep2": 13, "ep2tp4": 13, "ep32": [12, 16, 20], "ep4": [16, 20], "ep4tp2": 13, "ep8": [15, 16, 20], "ep8tp8": 13, "ep_load_balanc": [16, 103], "ep_siz": [9, 16, 18, 21, 24, 26, 27, 30, 45, 51], "epd": 161, "eplb": [8, 29, 164], "epsilon": [0, 145], "eq": 145, "equal": [0, 1, 8, 12, 15, 16, 23, 26, 29, 30, 32, 33, 50, 67, 68, 69, 92, 103, 106, 108, 115, 116, 138, 145, 146, 151, 159, 170], "equal_progress": [144, 159], "equat": [6, 145], "equilibr": 8, "equival": [11, 13, 15, 23, 108, 112, 139, 145, 160, 165], "era": 162, "erenup": 164, "eri": 34, "err": [67, 68, 69], "error": [0, 1, 15, 18, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 45, 64, 87, 92, 110, 113, 114, 115, 122, 131, 137, 142, 150, 151, 159, 164, 171], "errormsg": 0, "especi": [10, 11, 12, 14, 16, 17, 19, 20, 23, 55, 60, 61, 63, 92, 113, 114, 119, 138, 142, 148, 156, 158, 163, 181], "essenti": [11, 16, 41, 42, 44, 110, 125, 136], "establish": [0, 12, 15, 16, 17, 41, 92, 114], "estim": [16, 82, 136, 164, 182], "et": 3, "etc": [0, 1, 10, 16, 21, 30, 34, 38, 42, 43, 44, 64, 91, 103, 108, 125, 135, 136, 139, 143, 150, 151, 155, 158, 159, 160, 165], "ethnzhng": 164, "etp": 103, "euo": [30, 31, 34, 35], "eval": [10, 29, 30, 32, 33, 59, 109, 163], "evalu": [2, 4, 5, 8, 11, 12, 15, 17, 24, 31, 34, 35, 92, 95, 100, 109, 123, 163, 164], "even": [0, 7, 10, 11, 13, 16, 17, 19, 20, 21, 23, 27, 34, 40, 83, 92, 102, 104, 108, 117, 118, 129, 131, 137, 142, 145, 148, 150, 151, 159], "evenli": [13, 103, 116], "event": [0, 1, 10, 159, 164], "event_buffer_max_s": 159, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "eventu": [124, 159], "ever": [0, 107, 143], "everi": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 29, 39, 44, 66, 103, 107, 108, 115, 130, 136, 137, 138, 145, 150], "everydai": 34, "everyon": [10, 11, 14], "everyth": [107, 129], "evict": [0, 1, 11, 14, 22, 44, 65, 97, 102, 120, 121, 122, 136, 137, 142, 164], "evidenc": 8, "evolut": 109, "evolv": [13, 117, 131, 181], "ewr": 30, "ex": [68, 69], "exact": [2, 83, 117, 151], "exact_match": [29, 32, 33], "exactli": [10, 39, 96, 107], "exam": [11, 13], "examin": [16, 96, 125], "exampl": [0, 3, 5, 7, 9, 10, 12, 14, 16, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 44, 45, 50, 56, 57, 59, 64, 67, 68, 69, 72, 73, 79, 82, 83, 86, 88, 89, 91, 92, 94, 95, 97, 98, 99, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 117, 118, 119, 121, 124, 125, 126, 127, 131, 137, 138, 139, 140, 141, 142, 143, 144, 145, 150, 151, 153, 154, 155, 156, 157, 159, 160, 161, 163, 164, 165, 167, 170, 172, 173, 174, 176, 177, 179, 180, 182], "example_cuda_graph_config": 71, "example_function_nam": 94, "example_kv_cache_config": 71, "example_nam": 94, "example_valu": 94, "exaon": [104, 130, 154, 161, 164], "exaone4forcausallm": [154, 161], "exc": 62, "exce": [0, 8, 19, 108, 144, 145, 159], "exceed": [0, 28, 29, 30, 36, 37, 151, 159], "excel": [11, 12, 19, 20], "except": [0, 1, 10, 11, 13, 14, 16, 20, 23, 66, 79, 83, 102, 103, 107, 115, 117, 118, 131, 138, 145, 148, 155, 159, 164], "excess": [16, 83, 117], "exchang": [109, 159], "excit": [55, 60, 61, 62, 63, 113, 156, 158, 163], "excl": [26, 29, 30, 32, 33], "exclud": [1, 10, 31, 108, 139, 145, 159, 164], "exclude_default": 159, "exclude_input_from_output": 159, "exclude_modul": [128, 159, 164], "exclude_non": 159, "exclude_unset": 159, "excludeinputfromoutput": 0, "exclus": [1, 10, 86, 107, 118, 153, 164, 170], "exec": [29, 32, 33, 43, 135, 163], "execut": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 26, 29, 30, 31, 32, 33, 39, 40, 42, 43, 44, 64, 67, 71, 73, 84, 89, 92, 94, 95, 96, 101, 102, 103, 108, 115, 118, 122, 125, 129, 131, 135, 136, 142, 144, 145, 150, 151, 159, 162, 166, 168, 172, 174, 182], "executeloopbackrequest": 0, "executor": [1, 10, 17, 41, 50, 70, 92, 99, 100, 107, 114, 121, 125, 126, 136, 144, 150, 151, 159, 164, 166, 177], "executor_config": 181, "executorconfig": [0, 92, 115, 126, 159], "executorexamplefastlogit": 164, "exhaust": [0, 17, 85, 169], "exhibit": [8, 19], "exist": [1, 10, 11, 13, 15, 16, 19, 20, 23, 29, 30, 32, 33, 40, 41, 42, 44, 64, 78, 83, 88, 91, 99, 110, 113, 118, 121, 122, 125, 130, 131, 136, 150, 159, 162, 164, 167, 173, 176, 177], "exist_ok": 64, "exit": [11, 16, 20, 27, 45, 150], "exp": 145, "expand": [0, 5, 7, 11, 12, 14, 20, 89, 90, 108, 109, 145, 150, 159, 164, 174, 175], "expand_dim": 145, "expand_dims_lik": 145, "expand_mask": 145, "expand_shap": 145, "expanded_idx_to_permuted_idx": 145, "expandinputrow": 12, "expandinputrowskernel": 20, "expandtab": 159, "expans": 145, "expect": [0, 5, 9, 10, 14, 16, 17, 18, 20, 23, 29, 30, 32, 33, 34, 35, 44, 50, 64, 65, 67, 68, 69, 82, 83, 84, 97, 107, 117, 118, 123, 127, 129, 130, 131, 136, 137, 140, 141, 145, 148, 155, 159, 164, 168, 172], "expend": 11, "expens": [17, 92, 115, 125, 138, 139, 144], "experi": [6, 7, 10, 12, 13, 15, 16, 17, 19, 20, 21, 29, 30, 32, 33, 34, 35, 42, 43, 65, 66, 85, 109, 124, 125, 135, 155, 158, 162, 164, 169], "experiment": [8, 14, 26, 27, 38, 67, 68, 69, 118, 159, 164, 174], "experiment_config": [86, 170], "experimentconfig": [85, 86, 169, 170], "expert": [2, 8, 21, 22, 24, 26, 27, 30, 31, 32, 33, 34, 35, 45, 63, 89, 92, 108, 109, 114, 122, 143, 159, 162, 164, 174], "expert_scale_factor": 145, "expert_statist": 16, "expert_statistic_eplb": 16, "expert_statistic_iter_rang": 16, "expert_statistic_path": 16, "expertid": 16, "expertis": [8, 13, 15, 16, 17, 20], "expir": [0, 97], "explain": [15, 19, 34, 38, 39, 83, 92, 96, 102, 114, 118, 129, 142, 145, 151, 153, 166, 167], "explan": [2, 15, 21, 29, 30, 32, 33, 38, 39, 143, 150, 151], "explicit": [0, 1, 16, 27, 108, 125, 145, 164], "explicit_draft_token": [23, 125, 147], "explicitdrafttoken": [0, 1], "explicitdrafttokensdtyp": 1, "explicitdrafttokensinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 12, 15, 16, 23, 29, 30, 50, 86, 91, 108, 114, 119, 125, 129, 130, 159, 164, 170, 176], "explor": [11, 12, 13, 15, 16, 20, 106, 109, 125, 162, 163], "expon": 4, "exponenti": [17, 125], "export": [2, 9, 13, 14, 16, 20, 23, 36, 44, 53, 54, 67, 68, 69, 86, 89, 90, 108, 110, 128, 131, 136, 149, 150, 155, 158, 164, 170, 174, 175], "export_fmt": 179, "expos": [0, 10, 11, 18, 20, 21, 38, 86, 91, 110, 118, 129, 133, 139, 163, 164, 170, 172, 176], "exposur": 108, "express": [0, 108, 115, 145, 159], "extend": [0, 11, 13, 14, 15, 16, 19, 20, 41, 96, 105, 115, 121, 129, 143, 145, 158, 159, 162, 164], "extend_ctx": 107, "extended_runtime_perf_knob_config": [159, 164], "extendedruntimeperfknobconfig": [0, 109, 159], "extens": [17, 20, 39, 44, 91, 108, 128, 136, 159, 164, 176], "extent": 20, "extern": [0, 10, 11, 64, 69, 96, 119, 120, 130, 150, 151, 163, 164], "external_checkpoint_dir": 130, "external_kei": 130, "external_weight": 130, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 66, "extra": [0, 2, 10, 11, 12, 13, 14, 19, 20, 23, 26, 27, 39, 42, 44, 45, 51, 69, 82, 83, 85, 86, 92, 101, 107, 108, 114, 117, 121, 125, 128, 136, 138, 139, 148, 150, 159, 164, 169, 170, 177, 178], "extra_arg": [31, 68], "extra_bodi": [78, 99, 177], "extra_config": 108, "extra_encoder_opt": 27, "extra_id": 121, "extra_llm_api_fil": [26, 29, 30, 32, 33, 34, 35], "extra_llm_api_opt": [2, 9, 14, 16, 18, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 45, 51, 68, 79, 84, 92, 94, 99, 103, 107, 108, 136, 168, 172, 177], "extra_llm_api_options_eplb": 16, "extra_llm_config": 27, "extra_options_yaml_fil": 31, "extra_resource_manag": 159, "extra_token": 146, "extract": [0, 10, 11, 16, 22, 29, 32, 33, 43, 89, 107, 110, 115, 135, 140, 141, 145, 150, 159, 174], "extrapol": 145, "extrem": [8, 13, 16, 20, 102, 107, 129, 139, 142, 143], "f": [0, 18, 30, 43, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 70, 71, 72, 73, 74, 78, 79, 83, 94, 113, 117, 118, 132, 135, 137, 144, 145, 155, 156, 158, 159, 163], "fabric": [92, 114, 164], "face": [16, 24, 29, 30, 32, 33, 34, 35, 44, 50, 89, 90, 104, 105, 115, 122, 126, 131, 136, 147, 159, 163, 164, 174, 175], "facil": 41, "facilit": [16, 17, 107, 119, 125, 163], "fact": [30, 34, 44, 107, 136, 143], "factor": [7, 12, 15, 16, 20, 66, 138, 139, 145, 146, 151, 153, 164], "factori": [85, 88, 131, 150, 159, 164, 169, 173], "factual": 118, "fahrenheit": 94, "fail": [0, 8, 16, 18, 20, 21, 29, 30, 32, 33, 34, 35, 40, 79, 92, 114, 150, 151, 155, 159, 164, 182], "fail_fast_on_attention_window_too_larg": [27, 150, 159], "failfastonattentionwindowtoolarg": 0, "failur": [16, 92, 114, 130, 159, 164], "fairli": 129, "fairseq": [154, 164], "fake": [108, 121, 164], "fakebuff": 1, "falcon": [7, 44, 128, 136, 153, 154, 164], "falconconfig": 147, "falconforcausallm": 147, "falconmodel": 147, "fall": [12, 42, 45, 123, 164], "fallback": [94, 107, 130, 159, 164], "fals": [0, 1, 9, 11, 13, 15, 18, 19, 21, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 56, 59, 64, 66, 68, 73, 79, 82, 84, 85, 88, 94, 97, 103, 107, 108, 115, 117, 118, 119, 121, 128, 145, 146, 147, 148, 149, 150, 159, 164, 168, 169, 172, 173], "false_output_valu": 145, "false_valu": 145, "famili": [16, 36, 39, 108, 117, 130, 154, 164], "familiar": [84, 118, 129, 137, 138, 140, 141, 158, 168], "famou": [30, 34, 118], "faq": 109, "far": [0, 11, 14, 115], "fast": [0, 9, 10, 16, 18, 19, 20, 21, 65, 83, 89, 117, 120, 125, 136, 138, 150, 159, 164, 174], "fast_build": [23, 159, 164], "fastapi": 164, "fastapi_serv": 164, "faster": [4, 5, 12, 14, 15, 20, 21, 23, 30, 45, 64, 65, 83, 107, 117, 131, 137, 145], "fastest": 19, "fastlogit": 0, "fault": [16, 164], "favor": [8, 164], "favorit": 70, "fc": [128, 129, 130, 155], "fc1_latent_proj": 170, "fc2": [12, 159], "fc2_latent_proj": 170, "fc_gate": 146, "fc_gate_dora": 146, "fc_gate_lora": 146, "fc_gate_plugin": 146, "fd": 0, "featur": [0, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 29, 30, 32, 33, 39, 40, 44, 45, 64, 67, 68, 69, 90, 92, 97, 98, 102, 106, 108, 110, 112, 114, 115, 117, 119, 120, 122, 123, 125, 128, 129, 131, 136, 139, 142, 143, 144, 145, 148, 150, 154, 158, 159, 160, 165, 167, 175, 180], "feature_dim": 150, "feb": 11, "februari": 15, "fed": [45, 100, 147], "feed": [116, 145], "feedback": [16, 105, 164], "feel": [34, 70], "fetch": [0, 10, 14, 27, 42, 94, 159, 166], "few": [7, 14, 15, 16, 31, 34, 92, 102, 107, 113, 114, 121, 129, 131, 142], "fewer": [3, 8, 19, 83, 102, 108, 117, 125, 159, 167], "fewshot": 24, "fewshot_as_multiturn": 24, "ffn": [13, 95, 116], "ffn_hidden_s": 146, "fhma": 164, "field": [0, 11, 12, 27, 29, 31, 44, 50, 64, 83, 86, 92, 94, 107, 114, 118, 123, 128, 131, 133, 136, 139, 147, 148, 150, 153, 159, 164, 167, 170], "field_nam": 159, "fieldinfo": 159, "fifo": [16, 20], "figur": [8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 41, 92, 98, 108], "file": [0, 2, 9, 14, 16, 18, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 43, 44, 45, 52, 53, 54, 64, 79, 84, 85, 91, 94, 96, 99, 103, 107, 113, 115, 116, 117, 119, 121, 128, 129, 130, 131, 135, 136, 150, 159, 160, 164, 165, 168, 169, 172, 176, 177], "file_path": [64, 96], "file_prefix": 159, "filedesc": 0, "filenam": [0, 22, 26, 29, 30, 31, 32, 33, 34, 35, 96, 110], "filepath": 1, "filesystem": [0, 1, 96], "fill": [1, 10, 11, 55, 83, 97, 113, 130, 133, 145, 156, 158, 159, 163, 167], "fill_attention_const_params_for_long_rop": 146, "fill_attention_const_params_for_rop": 146, "fill_attention_param": 146, "fill_none_tensor_list": 146, "fill_valu": 145, "fillchar": 159, "fillemptyfieldsfromruntimedefault": 0, "filloper": 145, "filltaskstensor": 1, "filter": [19, 24, 29, 32, 33, 91, 107, 176], "filter_medusa_logit": 150, "filter_weight": [91, 176], "final": [0, 1, 8, 10, 11, 12, 13, 14, 16, 17, 20, 21, 23, 26, 29, 30, 31, 32, 33, 42, 50, 95, 122, 132, 145, 159, 164, 182], "final_logit_softcap": 147, "final_output_id": 150, "finalize_decod": 150, "finalizemoerout": 12, "finalizemoeroutingkernel": 20, "find": [2, 8, 10, 11, 12, 15, 16, 17, 64, 94, 106, 108, 109, 113, 139, 145, 155, 159, 164], "find_best_medusa_path": 150, "fine": [2, 15, 16, 38, 99, 103, 125, 136, 143, 146, 159, 162, 177], "finer": 119, "finetun": 13, "finish": [0, 1, 10, 12, 14, 16, 20, 42, 50, 96, 115, 118, 120, 131, 136, 150, 159, 166, 181], "finish_reason": [18, 21, 29, 30, 31, 32, 33, 34, 35, 159, 163, 164], "finished_gen_req_id": [64, 96], "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 164], "first": [0, 1, 5, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 23, 27, 31, 34, 35, 39, 41, 44, 45, 57, 64, 65, 72, 83, 86, 91, 92, 96, 97, 100, 102, 105, 109, 110, 114, 115, 117, 118, 119, 121, 122, 125, 132, 136, 137, 139, 142, 143, 144, 145, 151, 155, 158, 159, 160, 164, 165, 167, 170, 176, 181, 182], "first_come_first_serv": [144, 159], "first_gen_token": 159, "first_lay": 150, "firstgentoken": 0, "firstit": 0, "firstli": [12, 15, 16, 102, 132, 142, 151], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [0, 1, 3, 4, 12, 27, 28, 29, 30, 36, 37, 103, 105, 117, 138, 139, 150, 159, 182], "fitting_request": 182, "five": [19, 30], "fix": [10, 11, 14, 15, 17, 19, 20, 44, 92, 97, 98, 108, 114, 120, 122, 125, 136, 151], "fjosw": 164, "flag": [0, 1, 6, 9, 12, 16, 20, 21, 22, 24, 27, 29, 30, 32, 33, 34, 35, 44, 50, 84, 85, 102, 105, 115, 117, 122, 131, 136, 139, 140, 142, 144, 145, 151, 159, 164, 168, 169], "flags_siz": 1, "flan": [153, 154], "flash": [83, 90, 117, 129], "flashattent": [83, 117, 129], "flashinf": [29, 32, 33, 83, 84, 85, 86, 88, 90, 108, 159, 164, 167, 168, 169, 170, 172, 173, 175], "flashinfer_silu_and_mul": 108, "flashinferattent": [83, 167], "flashmla": [14, 164], "flatten": [1, 6, 16, 108, 122, 145, 146], "flattenedinouts": 1, "flattenn": 1, "flavor": 107, "flayer": 119, "flayerinfomemo": 119, "flexibl": [10, 13, 16, 21, 29, 32, 33, 41, 50, 86, 91, 94, 96, 107, 110, 125, 131, 158, 162, 170, 176], "flexibli": [11, 20], "flight": [1, 8, 44, 89, 92, 100, 109, 136, 142, 144, 151, 162, 164, 174], "flip": 145, "flip_sin_to_co": 146, "float": [0, 1, 4, 66, 73, 104, 118, 126, 128, 129, 144, 145, 146, 147, 150, 153, 159], "float16": [23, 119, 122, 126, 127, 128, 131, 138, 145, 147, 148, 155, 159], "float2": 145, "float32": [0, 23, 108, 128, 145, 146, 147, 148, 159], "float8_e5m2": [73, 159], "floattensor": [160, 165], "floattyp": [0, 1], "floor_div": 145, "floordiv": 145, "flop": [12, 15], "flow": [9, 11, 12, 13, 15, 17, 107, 108, 119, 130, 131, 137, 138, 139, 142, 143, 164, 166, 182], "fluctuat": [8, 29, 30, 32, 33, 34, 35, 92, 114], "flush": 12, "fly": [83, 117, 145, 153], "fmha": [0, 23, 102, 145, 148, 150, 151, 159, 164], "fmt_dim": 1, "focu": [7, 10, 11, 12, 13, 16, 43, 66, 105, 119, 135, 164], "focus": [11, 20, 21, 31, 34, 35, 38, 72, 105, 125, 136, 139, 140, 141, 164], "fold": 151, "folder": [0, 40, 85, 115, 118, 131, 137, 153, 154, 164, 169], "folder_trt_llm": 129, "follow": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 45, 50, 52, 56, 61, 62, 67, 68, 69, 83, 85, 86, 87, 88, 89, 90, 91, 92, 94, 95, 100, 103, 104, 106, 107, 108, 110, 113, 114, 115, 118, 119, 122, 124, 125, 127, 128, 129, 130, 131, 133, 136, 137, 138, 139, 140, 141, 142, 143, 145, 148, 153, 154, 158, 159, 160, 161, 163, 164, 165, 167, 169, 170, 171, 173, 174, 175, 176, 179, 180, 181], "footprint": [3, 15, 83, 104, 108, 117, 151], "for_each_rank": 147, "forbid": 159, "forc": [0, 11, 13, 16, 17, 40, 44, 65, 83, 92, 108, 117, 136, 159, 164, 170], "force_drop_id": 146, "force_dynamic_quant": 159, "force_multi_block_mod": 136, "force_nccl_all_reduce_strategi": 164, "force_num_profil": 159, "force_words_id": 118, "forcefulli": 10, "forecast": 125, "forev": 107, "forget": 34, "fork": [43, 135], "form": [0, 11, 12, 17, 20, 83, 103, 107, 115, 117, 125, 145, 159], "formal": 164, "format": [0, 4, 7, 12, 14, 15, 18, 22, 27, 29, 30, 31, 32, 33, 34, 35, 54, 79, 83, 90, 94, 100, 108, 109, 110, 115, 123, 128, 130, 131, 137, 139, 150, 151, 155, 159, 162, 164, 167, 175], "format_map": 159, "former": [7, 129], "formula": [12, 15, 17, 145], "forth": 16, "forthcom": [18, 21], "fortun": 10, "forum": 164, "forward": [0, 1, 10, 11, 14, 16, 17, 20, 42, 64, 82, 83, 92, 96, 107, 108, 113, 116, 119, 125, 127, 129, 144, 145, 146, 147, 155, 160, 164, 165, 166, 167, 181, 182], "forward_loop": 136, "forward_with_cfg": 147, "forward_without_cfg": 147, "forwardasync": 1, "forwarddispatch": 1, "forwardref": 159, "forwardsync": 1, "found": [2, 4, 10, 11, 16, 20, 27, 40, 56, 57, 64, 66, 83, 92, 96, 110, 114, 115, 116, 117, 118, 119, 125, 129, 136, 137, 139, 143, 153, 159, 163, 182], "foundat": [10, 11, 12, 14, 20], "four": [12, 13, 14, 57, 65, 91, 115, 119, 125, 128, 146, 176], "fourth": [97, 115], "fp": [153, 164], "fp16": [3, 4, 7, 12, 21, 23, 44, 83, 90, 104, 117, 122, 123, 126, 128, 130, 136, 139, 143, 145, 148, 154, 155, 164, 175], "fp32": [0, 13, 15, 23, 83, 90, 104, 117, 145, 148, 150, 154, 155, 159, 164, 175], "fp4": [2, 14, 15, 16, 20, 23, 28, 29, 32, 33, 36, 74, 104, 108, 148, 154, 158, 164], "fp4_gemm": 124, "fp4_quantiz": 108, "fp4_quantize_2": 108, "fp4_quantize_3": 108, "fp8": [3, 5, 6, 7, 13, 14, 15, 16, 18, 19, 21, 22, 23, 26, 29, 31, 32, 33, 35, 36, 44, 51, 55, 60, 90, 94, 99, 106, 108, 109, 113, 123, 131, 136, 140, 143, 145, 148, 151, 154, 156, 158, 159, 161, 163, 164, 167, 175, 177, 179, 180], "fp8_block_scal": 159, "fp8_blockscale_gemm": 164, "fp8_inputs_overrid": 145, "fp8_kv_cach": [83, 117, 153], "fp8_pb_wo": 164, "fp8_per_channel_per_token": 159, "fp8_qdq": 153, "fp8_rowwise_gemm_plugin": [23, 148], "fp_valu": [83, 117], "fpa_intb": 164, "frac": [8, 17, 26, 29, 30, 32, 33], "fraction": [0, 17, 24, 27, 29, 30, 32, 33, 34, 35, 71, 84, 88, 97, 145, 146, 150, 159, 168, 172], "fragment": 29, "framework": [8, 21, 89, 91, 105, 109, 125, 127, 128, 131, 145, 164, 174, 176], "franc": [35, 55, 56, 57, 60, 61, 62, 63, 71, 74, 94, 99, 113, 127, 129, 137, 144, 155, 156, 158, 163, 177], "francisco": 94, "free": [0, 1, 10, 11, 15, 16, 20, 24, 27, 29, 30, 32, 33, 34, 35, 56, 66, 71, 84, 97, 99, 102, 107, 120, 122, 129, 130, 142, 146, 147, 150, 151, 159, 164, 168, 172, 177, 181], "free_gpu_memory_fract": [9, 27, 31, 50, 51, 56, 71, 73, 74, 97, 144, 159, 164], "free_hostfunc_user_data": 10, "free_mem_ratio": [84, 88, 168, 172, 173], "free_resourc": [107, 166, 181], "freed": [8, 97, 107, 136], "freedom": 131, "freegpumemoryfract": [0, 151, 164], "freenumblock": [0, 27], "freez": 15, "french": [99, 177], "freq": 145, "frequenc": [0, 44, 136, 146, 159], "frequency_penalti": [150, 159, 164], "frequencypenalti": [0, 1, 118], "frequent": [8, 10, 11, 20, 38, 103, 121, 155, 159], "friend": [0, 1, 44, 136], "friendli": [16, 108, 145], "from": [0, 1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 41, 42, 44, 45, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 86, 88, 90, 91, 92, 94, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 155, 156, 157, 159, 160, 162, 163, 164, 165, 166, 167, 170, 173, 175, 176, 177, 178, 179, 180, 181, 182], "from_argu": [147, 148], "from_attribut": 159, "from_checkpoint": [131, 147], "from_config": 147, "from_dict": [147, 159], "from_dir": 150, "from_engin": 150, "from_hugging_fac": [127, 130, 131, 147], "from_jax": 131, "from_json_fil": [147, 159], "from_kera": 131, "from_kwarg": 159, "from_meta_ckpt": [131, 147], "from_model_config_cpp": 150, "from_nemo": [131, 147], "from_orm": 159, "from_pretrain": [11, 147], "from_prun": 147, "from_pybind": 159, "from_serialized_engin": 150, "from_str": 145, "fromfil": 129, "front": [12, 159], "frontier": [8, 162], "fruit": 15, "fu": 11, "full": [0, 4, 5, 12, 14, 15, 16, 17, 24, 26, 27, 29, 30, 32, 33, 34, 35, 39, 43, 44, 64, 66, 83, 86, 96, 97, 103, 116, 117, 118, 121, 122, 125, 135, 136, 138, 145, 150, 151, 155, 159, 162, 163, 164, 170], "full_path_to_work_dir": 31, "full_stop_token": 66, "fullgraph": 108, "fulli": [15, 39, 55, 96, 108, 164], "fun": [30, 34], "funcnam": 0, "function": [0, 1, 10, 11, 13, 14, 16, 20, 27, 34, 35, 38, 41, 42, 43, 71, 82, 83, 86, 89, 91, 94, 105, 106, 107, 108, 115, 117, 126, 127, 129, 131, 135, 143, 147, 148, 150, 151, 153, 154, 155, 158, 159, 162, 164, 170, 174, 176, 181, 182], "function_nam": 94, "functiont": 0, "functool": 159, "fundament": [8, 20], "further": [3, 7, 10, 11, 14, 15, 16, 17, 18, 23, 44, 83, 92, 100, 103, 107, 115, 116, 117, 125, 129, 136, 139, 143, 148, 167], "furthermor": [13, 16, 17, 92, 125, 139], "fuse": [10, 13, 15, 20, 21, 23, 83, 90, 100, 108, 117, 125, 129, 143, 145, 148, 160, 164, 165, 167, 175], "fuse_a": [13, 15], "fuse_fp4_qu": [23, 148], "fuse_qkv_project": 147, "fuseattentionwithbiaspass": 119, "fused_a": 103, "fused_gate_up_dora": 146, "fused_gate_up_lora": 146, "fused_mo": 159, "fusedgatedmlp": [145, 146], "fusedmo": 164, "fusevalu": 1, "fusion": [15, 23, 83, 89, 90, 109, 119, 142, 148, 151, 153, 159, 164, 167, 174, 175], "fusion_op": 145, "futur": [7, 10, 12, 16, 23, 34, 35, 41, 44, 55, 56, 57, 59, 60, 61, 62, 63, 66, 71, 72, 74, 83, 97, 102, 107, 109, 110, 113, 114, 117, 118, 120, 124, 125, 130, 131, 136, 137, 144, 145, 151, 156, 157, 158, 159, 163, 164], "fuyu": [154, 164], "fw": [91, 176], "fx": 108, "g": [0, 10, 11, 14, 15, 20, 21, 24, 26, 30, 31, 36, 38, 40, 42, 44, 67, 68, 69, 82, 86, 91, 94, 96, 101, 102, 103, 108, 111, 113, 115, 120, 123, 130, 136, 142, 150, 159, 160, 163, 165, 170, 178], "g00": 8, "g01": 8, "g0m": 8, "g1": [102, 142], "g10": 8, "g11": 8, "g1m": 8, "g2": [102, 142], "gain": [8, 12, 16, 19, 42, 95, 138, 142], "game": 18, "gamma": 145, "gap": [8, 10, 11, 17, 19, 20], "garbag": [20, 38, 159], "garbage_collection_gen0_threshold": [38, 159], "gate": [23, 122, 130, 137, 145, 148, 164], "gate_a": 145, "gate_a_bia": 145, "gate_bia": 145, "gate_proj": [130, 170], "gate_up_proj": 164, "gate_x": 145, "gate_x_bia": 145, "gatedmlp": [145, 146], "gather": [0, 1, 23, 61, 62, 145, 150, 159, 170], "gather_all_token_logit": [23, 164], "gather_context_logit": [23, 147, 150, 159], "gather_dim": [129, 145], "gather_generation_logit": [23, 147, 150, 159], "gather_last_token_logit": 145, "gather_nd": 145, "gather_output": 146, "gathercontext": [0, 164], "gatheredid": 1, "gatherel": 145, "gathergenerationlogit": 0, "gathermod": 145, "gathertre": 1, "gatherv2": 145, "gb": [5, 15, 65, 92, 110, 114, 136, 159], "gb200": [8, 12, 15, 17, 20, 21, 29, 30, 36, 45, 92, 103, 109, 114, 154, 162, 164], "gb300": [30, 154, 162], "gc": 20, "gcc": [110, 164], "gd": 0, "geforc": 164, "gegelu": 145, "gegelu_limit": 146, "geglu": 145, "gelu": [145, 147], "gelu_pytorch_tanh": 164, "gelu_tanh": 146, "gemm": [10, 15, 16, 20, 23, 89, 90, 103, 104, 119, 142, 145, 148, 151, 159, 164, 174, 175], "gemm_allreduc": 145, "gemm_allreduce_plugin": [23, 148, 150], "gemm_fc1": 13, "gemm_k": 12, "gemm_plugin": [23, 122, 126, 128, 129, 136, 139, 143, 146, 148], "gemm_q": 12, "gemm_qkv": 12, "gemm_swiglu": 145, "gemm_swiglu_plugin": [23, 139, 148], "gemm_v": 12, "gemma": [90, 104, 131, 152, 153, 154, 161, 162, 164, 175], "gemma2": 154, "gemma2_added_field": 147, "gemma2_config": 147, "gemma3": [91, 164, 176], "gemma3_added_field": 147, "gemma3_config": 147, "gemma3_weight_mapp": [91, 176], "gemma3forcausallm": [91, 154, 161, 176], "gemma3forconditionalgener": [154, 161], "gemma3hfweightmapp": [91, 176], "gemma_added_field": 147, "gemma_config_kwarg": 147, "gemmaconfig": 147, "gemmaforcausallm": 147, "gen": [8, 10, 17, 31, 108, 159, 164], "gen2dep4": 17, "gen4": 17, "gen8": 17, "gen_extra": 92, "gen_kwarg": [29, 32, 33], "gen_onli": 31, "genai": [7, 81, 109], "genattent": 13, "genenginepath": 0, "gener": [0, 1, 2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 42, 43, 44, 45, 55, 56, 57, 58, 64, 65, 71, 72, 73, 74, 82, 84, 85, 89, 90, 94, 95, 96, 97, 99, 102, 104, 105, 107, 109, 111, 113, 115, 118, 121, 125, 128, 129, 130, 131, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 147, 150, 151, 154, 155, 156, 159, 160, 162, 163, 164, 165, 166, 167, 168, 169, 170, 174, 175, 177, 179, 180, 181, 182], "generate_alibi_bias": 145, "generate_alibi_slop": 145, "generate_async": [11, 50, 61, 62, 94, 159, 164], "generate_eplb_config": 16, "generate_logn_sc": 145, "generate_tllm_weight": 130, "generate_with_stream": 38, "generated_text": [56, 57, 70, 137, 144], "generatejsonschema": 159, "generation_complet": 182, "generation_control": 11, "generation_dir": 11, "generation_in_progress": 182, "generation_kwarg": 11, "generation_kwargs_list": 11, "generation_logit": [150, 159], "generation_onli": 159, "generation_phas": [83, 117], "generation_request": 182, "generation_serv": [17, 92], "generation_task": 11, "generation_to_complet": 182, "generation_with_dynasor_cot": 11, "generationexecutor": [92, 114, 164], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 147, "generationoutput": 38, "generationresult": [11, 159], "generationresultbas": 159, "generationsequ": 150, "generationsess": [117, 150, 151], "generationstep": 1, "generationtask": 11, "genericprompttuningparam": 1, "genert": 114, "genexecutorconfig": 0, "genidx": 0, "genlengthlogitsprocessor": 66, "genlenthlogitsprocesor": 66, "genreqr": 17, "genrequest": 1, "geograph": [30, 34], "geographi": [34, 94], "germani": 94, "get": [0, 1, 6, 11, 14, 15, 16, 18, 20, 21, 26, 27, 29, 30, 32, 33, 43, 50, 64, 72, 76, 78, 83, 86, 94, 98, 107, 108, 110, 113, 115, 117, 119, 122, 126, 130, 132, 133, 135, 137, 139, 145, 150, 155, 159, 162, 163, 164, 170, 179, 182], "get_1d_sincos_pos_embed_from_grid": 146, "get_2d_sincos_pos_emb": 146, "get_2d_sincos_pos_embed_from_grid": 146, "get_audio_featur": 150, "get_batch_cache_indic": 181, "get_batch_idx": 150, "get_block_offset": 150, "get_buff": 181, "get_comm": 159, "get_config_group": 147, "get_context_phase_param": 159, "get_current_d": 94, "get_current_weath": 94, "get_default_config_load": [91, 176], "get_default_weight_load": [91, 176], "get_executor_config": 159, "get_finish": [64, 96], "get_first_past_key_valu": 146, "get_hf_config": 147, "get_indices_block_s": 159, "get_initialized_weight_mapp": [91, 176], "get_input": 119, "get_kv_cache_ev": 159, "get_kv_cache_events_async": 159, "get_max_resource_count": [181, 182], "get_needed_resource_to_complet": [181, 182], "get_next_medusa_token": 150, "get_num_free_block": 181, "get_num_heads_kv": 150, "get_num_new_matched_token": [64, 96], "get_output": [119, 129], "get_par": [119, 145], "get_pybind_enum_field": 159, "get_pybind_variable_field": 159, "get_request_typ": 159, "get_rope_index": 150, "get_runtime_s": 159, "get_seq_idx": 150, "get_shap": 130, "get_slic": 130, "get_stat": [159, 164], "get_stats_async": 159, "get_timestep_embed": 146, "get_token": 64, "get_us": [119, 145], "get_visual_featur": 150, "get_vocab": [0, 115], "get_weight": 146, "get_zcopi": [92, 114], "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getattentiondpeventsgatherperiodm": 0, "getattr": [11, 66], "getbackend": 0, "getbackendagentdesc": 0, "getbackendtyp": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcacheindirectioninput": 1, "getcacheindirectionoutput": 1, "getcachesaltid": 0, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconf": 20, "getconfig": 0, "getconnect": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglebuff": 1, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getexplicitdrafttokensbuff": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfailfastonattentionwindowtoolarg": 0, "getfastlogit": 0, "getfd": 0, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenerationstep": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 126], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethasindexerkcach": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getindexerdimperhead": 0, "getindexerkcachequantblocks": 0, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getitem": 108, "getitem_10": 108, "getitem_11": 108, "getitem_12": 108, "getitem_9": 108, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getkvtransfersenderfuturetimeoutm": 0, "getkvtransfertimeoutm": 0, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 115], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalconnectioninfo": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadbuff": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxgputotalbyt": 0, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumsequ": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmaxtokensinbuff": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getmultimodalhash": 0, "getmultimodalinput": 0, "getmultimodallength": 0, "getmultimodalposit": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsforgivenlay": 1, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getpromptignorelength": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 115], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 118, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuseuvm": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvirtualmemoryalloc": 1, "getvirtualmemorymanag": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": [39, 162, 164], "ghz": 59, "gib": [121, 151], "gid": [0, 40], "gigabyt": 5, "gij": 8, "gil": 20, "git": [2, 9, 18, 40, 104, 110, 122, 155, 158, 179], "github": [2, 11, 13, 16, 18, 22, 29, 30, 32, 33, 38, 66, 89, 104, 109, 110, 111, 131, 164, 174, 179], "give": [9, 12, 14, 15, 19, 20, 21, 34, 79, 86, 94, 106, 115, 139, 142, 147, 170, 180], "given": [0, 1, 2, 5, 8, 10, 16, 20, 30, 39, 43, 94, 97, 102, 104, 107, 115, 118, 122, 130, 131, 135, 140, 141, 142, 145, 146, 147, 149, 150, 151, 153, 159, 164, 181], "givyboi": 70, "glm": [145, 154, 164], "glm4": 164, "glob": 39, "global": [0, 8, 12, 13, 15, 19, 20, 108, 117, 120, 129, 164], "global_max_input_length": 150, "global_max_output_length": 150, "globalrequestid": 0, "glossari": [3, 6], "gm": [86, 155, 170], "gn0": 8, "gn1": 8, "gnm": 8, "gnu": 110, "go": [10, 11, 34, 83, 102, 117, 118, 138, 164], "goal": [16, 21, 144], "goe": [14, 44, 136, 158], "golden": 10, "good": [2, 11, 15, 16, 20, 36, 38, 44, 115, 129, 136, 138, 142, 143, 159], "googl": [90, 154, 161, 175], "got": [0, 1, 11, 34, 55, 59, 60, 61, 62, 63, 66, 70, 113, 136, 155, 156, 159, 163], "gp": 34, "gpqa": [13, 15, 24, 30], "gpt": [1, 4, 7, 23, 39, 44, 83, 104, 109, 117, 125, 129, 136, 145, 148, 151, 153, 154, 155, 161, 162, 164], "gpt2": [147, 155], "gpt3": 5, "gpt_attent": [6, 117, 119, 145, 164], "gpt_attention_plugin": [23, 122, 129, 136, 146, 148, 150, 155, 164], "gpt_attention_plugin_remove_pad": 119, "gpt_ib_ptun": 39, "gpt_oss": 30, "gpt_oss_output": 30, "gpt_variant": [147, 164], "gptattent": 119, "gptattentionpluginremovepaddingrewritepass": 119, "gptconfig": 147, "gptdecod": 118, "gptdecoderbatch": 164, "gptdecoderptr": 1, "gptforcausallm": 147, "gptj": 147, "gptjconfig": 147, "gptjforcausallm": 147, "gptjmodel": 147, "gptlmheadmodel": 155, "gptmanag": 164, "gptmanagerbenchmark": [110, 121, 164], "gptmodel": 147, "gptmodelconfig": 164, "gptneoxforcausallm": 147, "gptneoxmodel": 147, "gptossforcausallm": 161, "gptq": [7, 104, 154, 164], "gptsession": 164, "gptsessionbenchmark": 164, "gpu": [0, 1, 4, 5, 6, 7, 8, 9, 12, 14, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 42, 45, 50, 56, 64, 65, 67, 68, 69, 71, 83, 84, 85, 89, 92, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 108, 109, 110, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 125, 128, 131, 137, 138, 139, 143, 145, 147, 148, 150, 154, 155, 158, 159, 163, 164, 166, 167, 168, 169, 172, 174, 177, 178], "gpu_typ": 39, "gpu_weights_perc": [126, 150], "gpudirect": 0, "gpumemusag": [0, 27], "gpus_per_nod": [24, 27, 31, 159], "gpuspernod": [1, 118], "gpusync": 1, "gpuweightsperc": [0, 126], "gqa": [3, 6, 23, 83, 103, 109, 117, 120, 145, 148, 164, 167], "grace": [16, 20, 121, 154, 159], "grade": [90, 175], "gradient": 4, "gradual": [124, 131], "grain": [16, 103, 119], "gram": [107, 109, 125, 164], "grammar": [0, 109, 115, 159], "granit": [90, 154, 164, 175], "granular": [20, 38, 84, 168], "graph": [0, 14, 15, 16, 19, 20, 26, 29, 30, 32, 33, 34, 35, 43, 44, 52, 71, 83, 85, 86, 89, 90, 92, 93, 106, 107, 109, 114, 129, 135, 136, 145, 150, 151, 152, 155, 159, 161, 164, 167, 169, 170, 172, 174, 175, 181], "graph_rewrit": 119, "graphic": [18, 64], "gratitud": 14, "gre": [27, 31], "great": [3, 8, 10, 12, 16, 18, 21, 30, 92], "greater": [0, 6, 7, 8, 13, 16, 83, 92, 97, 114, 117, 145, 159], "greatli": [10, 12, 14, 20, 121, 131, 139, 143], "greedi": [0, 9, 42, 72, 106, 107, 118, 159, 166], "greedy_sampl": 159, "greedysampl": 0, "greedysamplinghost": 1, "greener": 11, "grid": [129, 139, 142, 145, 146, 170], "grid_search_engin": 137, "grid_siz": 146, "grok": [154, 164], "groovi": 39, "gross": 29, "ground": [43, 135], "group": [0, 3, 15, 16, 20, 29, 30, 32, 33, 34, 35, 38, 97, 108, 109, 115, 116, 118, 120, 129, 145, 146, 148, 153, 159, 164, 167, 170], "group_cl": 147, "group_norm": 145, "group_rms_norm": 164, "group_siz": [128, 145, 159], "groupedrmsnorm": 13, "groupgemm": [15, 16], "groupnorm": [145, 146], "groupwis": 148, "grow": [1, 12, 17, 19, 83, 102, 125, 142], "gsm8k": [15, 29, 32, 33, 95], "gt": [145, 159], "gtc": [2, 13], "guarante": [0, 8, 10, 12, 16, 20, 28, 29, 30, 36, 37, 44, 45, 89, 94, 105, 108, 109, 118, 121, 131, 136, 137, 139, 144, 174], "guaranteed_no_evict": [0, 22, 44, 136, 144, 159], "guaranteednoevictschedul": 182, "guard": 137, "guardian": [90, 175], "guid": [0, 2, 7, 9, 18, 21, 43, 45, 52, 58, 83, 84, 89, 93, 100, 104, 113, 129, 135, 137, 138, 139, 141, 143, 145, 155, 158, 159, 161, 162, 164, 167, 168, 174], "guidanc": [8, 20, 83, 108, 125, 143, 146, 147], "guided_decod": [59, 94, 159], "guided_decoding_backend": [59, 79, 94, 159], "guideddecod": 10, "guideddecodingbackend": [0, 94], "guideddecodingconfig": [0, 115], "guideddecodingparam": [0, 59, 94, 109, 115, 159], "guidedrequest": 10, "guidelin": [38, 89, 108, 138, 174], "guidetyp": [0, 115], "gw": 119, "h": [9, 11, 14, 18, 20, 21, 23, 27, 29, 30, 31, 32, 33, 34, 35, 46, 47, 48, 49, 83, 85, 92, 115, 117, 125, 130, 137, 145, 147, 163, 164, 169, 172], "h0": 14, "h1": 145, "h100": [7, 18, 29, 36, 39, 45, 109, 131, 137, 139, 140, 141, 142, 162, 164], "h200": [4, 10, 28, 29, 30, 36, 45, 109, 162, 164], "h200_sxm": [28, 30, 36], "ha": [0, 1, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 29, 30, 31, 32, 33, 34, 35, 42, 43, 44, 45, 66, 83, 91, 92, 96, 97, 102, 105, 107, 108, 109, 110, 111, 115, 117, 121, 122, 123, 128, 129, 130, 131, 133, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 150, 151, 153, 155, 159, 163, 164, 166, 176, 181, 182], "habitu": 39, "had": [2, 12, 15, 102, 131, 139, 142], "half": [0, 1, 15, 20, 129, 137, 145], "halv": [4, 145], "hand": [12, 36, 39, 121, 125, 138], "handl": [0, 1, 3, 8, 10, 11, 12, 13, 17, 20, 21, 28, 29, 30, 36, 37, 41, 42, 64, 67, 91, 92, 96, 98, 100, 102, 107, 108, 114, 116, 120, 130, 131, 137, 139, 142, 143, 144, 145, 146, 159, 160, 162, 165, 166, 170, 176], "handle_per_step": 150, "handler": [11, 96], "hang": [0, 10, 16, 92, 114, 155, 164], "hao": 11, "happen": [1, 10, 11, 16, 96, 97, 113, 115, 118, 121, 129, 151, 155], "happi": 150, "har": [15, 29, 32, 33], "hard": [11, 24, 83, 90, 108, 117, 159, 175], "harder": 118, "hardwar": [7, 12, 15, 18, 19, 21, 26, 31, 36, 39, 42, 50, 108, 109, 110, 120, 164], "has_affin": 145, "has_bia": 145, "has_config_group": 147, "has_position_embed": 150, "has_pp": 108, "has_scal": 145, "has_token_type_embed": 150, "has_zero_point": [128, 159], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 115], "hasgenawaitthread": 0, "hash": [0, 64, 96, 97, 100, 159], "hash_valu": [64, 96], "hashed_token": 64, "hasindexerkcach": 0, "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 164, "have": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 29, 30, 32, 33, 34, 35, 42, 43, 44, 45, 57, 64, 67, 68, 69, 70, 74, 83, 86, 90, 91, 92, 94, 96, 97, 98, 102, 104, 107, 108, 113, 114, 115, 116, 117, 118, 121, 122, 124, 125, 128, 129, 130, 131, 132, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 148, 150, 151, 154, 155, 158, 159, 160, 163, 164, 165, 170, 175, 176], "hbm": 109, "hbm3e": [5, 12], "hcxvisionforcausallm": [154, 161], "head": [1, 3, 14, 15, 19, 23, 85, 90, 92, 95, 97, 109, 118, 120, 125, 129, 136, 145, 146, 148, 159, 164, 167, 169, 175], "head_dim": [83, 167, 170, 181], "head_siz": [83, 117, 145, 147, 150, 164], "header": [114, 159], "headquart": 64, "headsiz": 145, "headsperlay": 1, "health": [18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 70, 109], "healthi": 9, "heard": 34, "heat": 118, "heavi": [20, 39, 143], "heavier": 138, "heavili": [8, 12, 16, 20], "height": [8, 26, 54, 146, 150], "held": [10, 20, 97], "helix": [109, 159], "hello": [42, 55, 56, 57, 60, 61, 62, 63, 67, 70, 71, 99, 104, 106, 113, 137, 144, 156, 158, 163, 177, 179, 180], "help": [10, 12, 13, 14, 16, 17, 20, 23, 24, 27, 29, 30, 32, 33, 34, 35, 43, 44, 45, 46, 47, 59, 65, 66, 70, 71, 72, 73, 74, 75, 76, 79, 83, 85, 92, 94, 98, 102, 108, 110, 114, 115, 117, 119, 129, 135, 136, 137, 140, 141, 142, 143, 144, 145, 148, 158, 159, 163, 164, 166, 169, 172], "helper": [1, 39, 145], "henc": [10, 20, 108, 160, 165], "hendryck": 24, "here": [2, 4, 5, 10, 11, 14, 15, 16, 24, 26, 29, 30, 32, 33, 34, 35, 36, 37, 38, 41, 50, 55, 59, 64, 66, 83, 91, 94, 97, 104, 106, 108, 110, 113, 115, 119, 122, 126, 127, 128, 129, 130, 131, 133, 135, 137, 138, 139, 142, 143, 145, 150, 151, 153, 155, 157, 159, 163, 167, 176, 181, 182], "hesit": 11, "heterogen": [8, 92, 114], "heurist": [15, 22, 40, 44, 83, 109, 117, 136, 145, 159, 164], "hf": [21, 22, 23, 24, 27, 44, 45, 55, 60, 61, 62, 63, 67, 68, 69, 78, 85, 89, 90, 91, 113, 118, 122, 126, 130, 136, 137, 150, 154, 155, 156, 158, 159, 163, 169, 172, 174, 175, 176, 179], "hf_config_or_dir": 147, "hf_gemma3": [91, 176], "hf_home": 40, "hf_lora_convert": 122, "hf_model": [136, 147], "hf_model_card_or_dir": [88, 173], "hf_model_dir": [91, 126, 127, 128, 131, 147, 176], "hf_model_nam": 136, "hf_model_or_dir": 147, "hf_quant_config": [44, 136], "hf_token": [9, 44, 136], "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx": 9, "hfcheckpointload": [91, 159, 176], "hfconfigordir": 147, "hgx": 5, "hi": 122, "hidden": [0, 11, 12, 13, 14, 20, 21, 82, 83, 86, 103, 107, 115, 116, 117, 118, 122, 125, 145, 146, 159, 164, 170], "hidden_act": [128, 146, 147], "hidden_dim": [0, 83, 117, 145], "hidden_dim_per_head": [83, 102, 117, 145], "hidden_dtyp": 146, "hidden_s": [0, 12, 83, 86, 119, 128, 130, 145, 146, 147, 150, 160, 165, 167, 170], "hidden_size_in": 122, "hidden_size_out": 122, "hidden_size_per_head": 145, "hidden_st": [12, 82, 107, 127, 145, 146, 147, 150, 155, 160, 165], "hidden_states_for_emb": 147, "hiddens": [0, 1, 118], "hiddenst": 12, "hide": [10, 13, 15, 42], "hierarch": 128, "hierarchi": [27, 38, 131, 145], "high": [3, 7, 8, 10, 11, 13, 14, 15, 17, 18, 19, 20, 28, 29, 30, 32, 33, 36, 38, 57, 64, 88, 90, 92, 95, 96, 102, 103, 104, 109, 115, 125, 127, 129, 131, 136, 144, 145, 151, 158, 162, 164, 173, 175], "higher": [0, 1, 3, 4, 6, 8, 10, 12, 15, 16, 17, 19, 20, 21, 45, 72, 83, 86, 97, 98, 102, 106, 108, 117, 118, 121, 122, 125, 130, 144, 151, 160, 164, 165, 170], "highest": [4, 5, 8, 21, 38, 57, 86, 118, 119, 159, 170], "highli": [11, 15, 16, 19, 20, 21, 42, 43, 125, 129, 135, 139], "highlight": [4, 7, 102, 109, 139, 142], "highwai": 30, "hin": 14, "hinderlit": 29, "hint": [20, 38, 145], "histor": [20, 103], "histori": [15, 35], "hit": [0, 15, 19, 42, 45, 64, 65, 96, 142, 143, 159, 164], "hk": 125, "hmac": 159, "hmm": [11, 34], "ho": 122, "hoc": [131, 150], "hold": [0, 1, 10, 11, 16, 65, 91, 97, 103, 115, 116, 119, 120, 121, 122, 125, 138, 146, 151, 159, 166, 176], "home": [2, 40, 44, 73, 136], "home_dir": 40, "homework": 34, "homo_head_pattern": 146, "homogen": [92, 114], "honor": 30, "hood": 158, "hope": [8, 11, 12, 13, 16, 17, 34], "hopefulli": 10, "hopper": [2, 3, 4, 7, 9, 12, 14, 15, 16, 21, 23, 36, 83, 104, 110, 117, 121, 139, 148, 154, 162, 164], "horizont": [15, 23, 148], "host": [1, 9, 15, 17, 18, 19, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 42, 51, 65, 67, 68, 69, 92, 108, 110, 113, 122, 133, 143, 145, 159, 163, 164], "host_cache_s": [65, 97, 99, 159, 177], "host_context_length": [145, 146, 147, 150, 155], "host_context_progress": [145, 146, 155], "host_cross_kv_cache_block_offset": [146, 150], "host_cross_kv_cache_pool_map": 146, "host_cross_kv_cache_pool_point": 146, "host_kv_cache_block_offset": [145, 146, 150, 155], "host_kv_cache_block_point": 155, "host_kv_cache_pool_map": [145, 146, 155], "host_kv_cache_pool_point": [145, 146, 155], "host_max_attention_window_s": [145, 146, 155], "host_past_key_value_length": [145, 146, 155], "host_path": [29, 30, 33], "host_request_typ": [145, 146, 147, 155], "host_runtime_perf_knob": [145, 146, 155], "host_sink_token_length": [145, 146, 155], "hostcaches": [0, 121], "hostfunc": 10, "hostmemori": 1, "hostnam": [17, 27, 92], "hot": [16, 20, 34, 103, 108], "hottest": 16, "hour": 137, "hous": [16, 138], "how": [0, 8, 12, 16, 17, 19, 20, 21, 23, 26, 29, 30, 32, 33, 34, 38, 39, 43, 44, 55, 64, 65, 67, 71, 73, 82, 83, 85, 88, 92, 99, 100, 102, 104, 108, 109, 113, 114, 115, 125, 127, 129, 130, 131, 133, 135, 137, 139, 140, 141, 143, 145, 148, 151, 153, 155, 157, 159, 163, 166, 167, 169, 172, 173, 177], "howev": [2, 3, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 44, 83, 86, 98, 102, 107, 108, 115, 117, 125, 131, 136, 138, 139, 142, 143, 144, 151, 160, 164, 165, 166, 170], "hpc": 4, "html": [1, 22, 29, 30, 32, 33, 129, 145, 155], "http": [0, 1, 2, 9, 11, 12, 13, 18, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 40, 46, 47, 48, 49, 66, 75, 76, 77, 78, 79, 80, 92, 94, 99, 101, 104, 110, 113, 116, 122, 129, 131, 145, 148, 153, 155, 158, 163, 164, 172, 177, 178, 179], "http_code": [9, 18, 21, 29, 30, 31, 32, 33, 34, 35], "huang": 64, "hub": [18, 21, 24, 29, 30, 32, 33, 44, 70, 104, 136, 159, 164, 179], "hudson": [30, 34], "hug": [24, 29, 30, 32, 33, 34, 35, 44, 50, 89, 90, 104, 105, 115, 122, 126, 131, 136, 147, 159, 163, 164, 174, 175], "huge": [20, 164], "hugepag": 20, "hugepages": 20, "hugepages_fre": 20, "hugepages_rsvd": 20, "hugepages_surp": 20, "hugepages_tot": 20, "hugetlb": 20, "huggingfac": [0, 2, 9, 12, 21, 22, 24, 27, 29, 30, 32, 33, 40, 44, 45, 47, 70, 76, 89, 99, 107, 122, 127, 128, 130, 131, 136, 137, 154, 155, 158, 160, 161, 162, 163, 164, 165, 170, 174, 177], "huggingface_exampl": [104, 179], "huggingface_hub": [9, 70], "huggingface_model_card": [104, 179], "human": [13, 42, 44, 105, 136], "hundr": 16, "hurt": [10, 15, 16, 102, 143], "hw": [13, 15, 16], "hybrid": [103, 116, 164, 170], "hyper": 128, "hyperclova": 152, "hyperclovax": [154, 164], "hypothes": 106, "hypothesi": 125, "i": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 80, 82, 83, 84, 85, 86, 87, 89, 90, 91, 92, 94, 95, 96, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 133, 136, 137, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 150, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 174, 175, 176, 177, 179, 180, 181, 182], "ia3": [83, 117], "iactivationlay": 129, "ib": [39, 92, 114], "ibm": [90, 175], "ibrahimamin1": 164, "ibufferptr": 1, "icon": 35, "iconstantlay": 145, "icudaengin": [150, 151], "id": [0, 1, 10, 14, 16, 18, 20, 21, 22, 24, 27, 29, 30, 31, 32, 33, 34, 35, 44, 45, 50, 62, 66, 82, 83, 96, 97, 103, 107, 115, 121, 136, 145, 146, 150, 158, 159, 163, 167, 181], "idea": [10, 12, 15, 16, 122, 143], "ideal": [8, 16, 17, 107, 119, 139, 142, 164], "idempot": 159, "ident": [15, 20, 23, 64, 106, 110, 115, 121, 145, 148, 180], "identifi": [0, 11, 16, 17, 42, 44, 64, 96, 118, 122, 125, 129, 136, 142, 145, 159, 164], "identity_plugin": 148, "idl": [0, 20, 42, 101, 178], "idtyp": [0, 115], "idx": [73, 150], "ieee": 153, "ieinsumlay": 145, "ielementwiselay": 145, "iexecutioncontext": [150, 151], "ifb": [8, 17, 92, 109, 125, 152, 164], "ifilllay": 145, "igatherlay": 145, "ignor": [0, 9, 23, 26, 29, 30, 31, 32, 33, 34, 35, 40, 44, 86, 136, 145, 148, 150, 159, 170], "ignore_eo": [159, 164], "igptdecod": 1, "ihostmemori": [1, 129, 150], "ii": [30, 117, 145], "ij": 145, "ijk": 145, "ijl": 145, "ik": 145, "ikl": 145, "ilay": [119, 129], "illeg": 164, "illustr": [8, 11, 13, 14, 16, 17, 26, 29, 42, 92, 96, 98, 101, 102, 119, 125], "ilogg": 1, "ilooplay": 145, "imag": [9, 21, 22, 26, 27, 29, 30, 32, 33, 44, 47, 54, 67, 68, 69, 76, 100, 109, 112, 113, 136, 146, 150, 154, 161, 164], "image64": 76, "image_base64": 27, "image_data_format": 22, "image_grid_thw": 150, "image_nam": [31, 34, 35], "image_patches_indic": 150, "image_path": 150, "image_s": 147, "image_tag": [31, 34, 35, 111], "image_token_index": 150, "image_url": [27, 47, 76], "image_url1": 76, "image_url2": 76, "imagin": 11, "imatrixmultiplylay": 145, "imb": 16, "imbal": [8, 16, 20, 21, 103, 142], "imbalanc": 16, "immedi": [8, 11, 20, 34, 42, 83, 89, 96, 117, 125, 137, 155, 174], "immut": 1, "impact": [3, 7, 8, 12, 13, 14, 15, 16, 20, 27, 41, 42, 70, 102, 123, 125, 138, 139, 142, 143, 144], "imped": [7, 16], "impl": [0, 108, 182], "implement": [3, 10, 12, 15, 17, 18, 21, 22, 27, 41, 64, 66, 82, 85, 89, 90, 91, 92, 102, 103, 106, 107, 108, 109, 114, 115, 117, 118, 120, 124, 125, 128, 129, 131, 145, 146, 147, 153, 154, 155, 159, 160, 164, 165, 166, 169, 174, 175, 176, 181, 182], "impli": [10, 12], "implic": [8, 10], "implicit": [1, 10, 117, 125, 145], "implicitli": 1, "import": [1, 3, 7, 10, 11, 14, 15, 18, 20, 22, 27, 34, 38, 39, 42, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 88, 91, 94, 97, 98, 99, 102, 104, 106, 107, 108, 113, 123, 125, 130, 131, 137, 139, 142, 143, 144, 154, 156, 157, 158, 159, 160, 163, 164, 165, 166, 173, 176, 177, 179, 180, 181], "importantli": [16, 40], "impos": 7, "imposs": [10, 108], "impract": 108, "improv": [3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 26, 29, 30, 32, 33, 34, 35, 42, 44, 45, 55, 60, 61, 63, 66, 71, 83, 100, 101, 102, 105, 113, 117, 121, 123, 129, 136, 137, 139, 140, 141, 142, 143, 148, 156, 157, 158, 159, 163, 164, 167, 178], "in_channel": 146, "in_featur": [128, 129, 146], "in_hidden_s": 145, "in_len": 119, "in_point": 145, "in_progress": 150, "in_proj": 170, "inadequ": 108, "incex": 159, "includ": [0, 1, 3, 4, 6, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 42, 44, 45, 50, 59, 71, 82, 83, 86, 89, 90, 92, 96, 100, 102, 107, 108, 110, 115, 117, 118, 121, 122, 124, 125, 128, 129, 130, 136, 139, 143, 145, 153, 155, 156, 158, 159, 162, 164, 166, 167, 170, 174, 175, 181, 182], "include_stop_str_in_output": 159, "inclus": [92, 114, 145], "incom": [8, 42, 64], "incompat": [23, 89, 157, 164, 174], "inconsist": 11, "incorpor": [0, 13, 16, 89, 100, 164, 174], "incorrect": [12, 121, 125, 164], "increas": [0, 2, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 31, 39, 42, 43, 44, 66, 83, 92, 97, 98, 102, 108, 109, 117, 121, 125, 129, 135, 136, 137, 139, 143, 144, 145, 148, 164, 182], "increasingli": [8, 11, 20], "increment": [16, 110, 164], "incur": [13, 17, 42, 92, 114, 129], "inde": 151, "indent": 159, "independ": [0, 1, 12, 17, 92, 103, 114, 115, 125, 145], "index": [0, 1, 13, 18, 20, 21, 24, 29, 30, 31, 32, 33, 34, 35, 39, 50, 73, 83, 96, 109, 113, 115, 120, 125, 130, 145, 150, 159, 163, 164, 167], "index_head_dim": 159, "index_max_chunk_s": 73, "index_n_head": 159, "index_select": 145, "index_topk": 159, "indexer_max_chunk_s": [73, 159], "indexerdimperhead": 0, "indexerkcachequantblocks": 0, "indic": [0, 1, 8, 10, 11, 12, 17, 20, 21, 38, 83, 94, 96, 97, 115, 117, 118, 125, 128, 144, 145, 146, 150, 151, 159, 181], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [8, 13, 16, 17, 39, 164, 176], "indivis": 164, "inductor": [108, 159], "industri": [21, 44, 136], "ineffect": 10, "ineffici": [11, 12, 13, 83, 102, 117], "inetworkdefinit": [119, 129, 145], "inevit": [10, 129], "infeas": 115, "infer": [0, 2, 3, 4, 5, 8, 10, 12, 13, 15, 20, 29, 30, 32, 33, 34, 35, 36, 42, 43, 45, 47, 52, 67, 71, 76, 82, 85, 86, 89, 90, 92, 95, 97, 98, 99, 100, 103, 104, 105, 107, 108, 109, 114, 118, 122, 125, 129, 131, 135, 137, 138, 139, 140, 141, 143, 144, 145, 150, 153, 155, 156, 158, 159, 162, 164, 166, 169, 170, 174, 175, 177], "infer_shap": 150, "inferencemax": 12, "inferenceoptim": [86, 170], "inferencerequest": 164, "infin": [10, 133], "infiniband": [92, 114], "infinit": [44, 45, 129, 136], "inflat": 13, "inflight": [0, 27, 83, 102, 117, 122, 125, 134, 136, 140, 141, 142, 145, 159, 164, 167, 182], "inflight_request_id": 182, "inflightbatch": 0, "inflightbatchingstat": 0, "influenc": [13, 143], "info": [0, 16, 22, 23, 24, 26, 27, 30, 31, 41, 64, 87, 136, 151, 155, 164, 171], "inform": [0, 1, 3, 6, 10, 13, 14, 16, 17, 20, 22, 27, 29, 30, 32, 33, 41, 44, 45, 72, 79, 82, 83, 88, 89, 91, 92, 94, 96, 97, 100, 108, 110, 113, 114, 115, 117, 118, 120, 125, 128, 129, 136, 137, 154, 155, 164, 170, 173, 174, 176], "infrastructur": [8, 11, 38, 91, 176], "infti": [8, 118], "ingest": [86, 170], "inher": [8, 16, 103], "inherit": [11, 41, 86, 106, 130, 131, 145, 159, 160, 165, 166, 170, 181, 182], "init": [1, 2, 15, 18, 108, 110, 159, 164], "init_audio_encod": 150, "init_backend": 159, "init_build_config": 159, "init_calib_config": 159, "init_image_encod": 150, "init_llm": 150, "init_model_and_config": [91, 176], "init_processor": 150, "init_token": 150, "init_with_new_llm": 11, "initi": [0, 1, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22, 29, 30, 32, 33, 34, 35, 40, 41, 42, 44, 83, 84, 85, 86, 91, 92, 94, 96, 97, 99, 102, 105, 107, 114, 125, 130, 136, 139, 142, 143, 150, 151, 155, 159, 160, 164, 165, 167, 168, 169, 170, 176, 177, 182], "initial_global_assign": 16, "initial_prompt": 11, "initial_prompt_token_num": 11, "initialis": [148, 159], "initializecommand": 40, "initializer_list": [0, 1], "initmemorypool": 151, "inittozero": 1, "inject": 107, "inlin": [0, 1], "inner": [86, 145, 170], "inner_layernorm": [146, 147], "innov": [12, 15, 16, 18], "inp": [145, 159], "inpaint": [27, 47, 76], "inplac": [10, 106], "inprogress": 1, "input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 43, 44, 45, 49, 50, 53, 54, 64, 68, 76, 80, 82, 92, 95, 97, 98, 100, 102, 103, 107, 108, 109, 115, 118, 119, 121, 122, 123, 125, 129, 130, 135, 136, 137, 138, 140, 141, 143, 144, 145, 146, 147, 148, 150, 151, 155, 159, 160, 164, 165, 166, 167, 182], "input_1": 145, "input_1_": 145, "input_audio": 150, "input_context": 73, "input_featur": 147, "input_fil": [73, 164], "input_id": [13, 44, 108, 121, 127, 136, 145, 147, 150, 155, 160, 165], "input_imag": 150, "input_layernorm": [127, 128, 130, 160, 165], "input_length": [31, 145, 146, 147, 150], "input_list": [108, 145], "input_n": 145, "input_n_": 145, "input_queri": 73, "input_sequence_len": 19, "input_str": 11, "input_text": [127, 129, 150], "input_timing_cach": [23, 159], "input_token_extra_id": 150, "inputbuff": 1, "inputdesc": 129, "inputdtyp": 1, "inputgentokenshost": 1, "inputpack": [1, 118], "inputs_emb": [160, 165], "inputtokenextraid": 0, "inputtokenid": 0, "ins": 159, "insert": [10, 12, 16, 89, 90, 107, 108, 119, 129, 136, 145, 159, 174, 175], "insert_cached_attent": [84, 86, 168, 170, 172, 173], "insert_cached_mla_attent": 173, "insertinputtensor": 1, "insid": [1, 2, 9, 12, 14, 15, 20, 29, 30, 32, 33, 34, 35, 67, 68, 69, 83, 108, 110, 125, 130, 131, 145, 151, 163, 167], "insight": [8, 13, 16, 17], "insiz": 1, "inspect": [21, 23, 43, 86, 135, 151, 170], "inspir": [10, 12, 14], "instabl": 8, "instal": [9, 18, 21, 24, 29, 30, 31, 32, 33, 34, 35, 52, 67, 68, 69, 89, 105, 109, 110, 131, 132, 137, 158, 160, 163, 164, 165, 174], "instanc": [0, 8, 10, 11, 13, 16, 17, 20, 21, 29, 30, 32, 33, 34, 35, 50, 64, 66, 83, 92, 94, 96, 97, 106, 107, 114, 115, 118, 119, 120, 125, 129, 148, 150, 151, 158, 159, 163, 164, 167], "instance_idx": 155, "instanti": [137, 144, 159, 181], "instead": [2, 3, 8, 11, 12, 16, 20, 21, 22, 29, 31, 38, 39, 50, 64, 65, 83, 89, 97, 98, 99, 105, 107, 108, 110, 113, 119, 121, 125, 129, 131, 143, 144, 145, 151, 159, 164, 174, 177], "instruct": [2, 15, 17, 18, 20, 21, 24, 27, 29, 30, 32, 33, 36, 44, 45, 47, 54, 55, 60, 64, 73, 74, 76, 86, 90, 94, 96, 100, 104, 106, 110, 111, 113, 125, 136, 137, 138, 143, 144, 154, 156, 158, 160, 161, 163, 164, 165, 170, 172, 175, 179, 180], "instrument": 15, "int": [0, 1, 38, 62, 64, 66, 73, 83, 92, 96, 98, 106, 108, 118, 127, 128, 129, 131, 142, 145, 146, 147, 148, 150, 159, 160, 165, 167, 181, 182], "int32": [1, 10, 23, 108, 117, 145, 148, 155], "int32_t": [0, 1, 145], "int4": [7, 16, 21, 23, 50, 130, 131, 154, 164], "int4_weight": 153, "int64": [1, 10, 118, 145, 155], "int64_t": [0, 1], "int8": [1, 7, 16, 21, 22, 23, 104, 128, 130, 131, 139, 145, 151, 154, 159, 164], "int8_kv_cach": [83, 117, 153, 164], "int8_t": [0, 1], "int8_weight": 153, "int8awq": 139, "int_clip": 145, "integ": [22, 44, 79, 82, 83, 94, 97, 98, 102, 117, 136, 145, 148, 153, 159, 164], "integr": [11, 16, 17, 20, 21, 22, 38, 41, 42, 83, 84, 88, 89, 92, 95, 100, 105, 109, 125, 158, 162, 164, 166, 167, 168, 172, 173, 174, 181, 182], "intellig": [18, 72, 103, 162], "intend": [0, 24, 29, 30, 32, 33, 34, 35, 40, 41, 108, 110, 151, 159], "intens": [8, 10, 15, 16], "intent": 137, "intention": [9, 131, 159], "intenum": [107, 145], "inter": [16, 21, 44, 92, 114, 137, 138, 139, 142, 143, 155, 164], "inter_layernorm": 147, "inter_s": 130, "interact": [16, 17, 21, 42, 92, 95, 96, 110, 115, 125, 155, 163], "interchang": [10, 94, 120], "interconect": 138, "interconnect": [118, 137, 138, 139, 142, 143], "interest": [12, 16, 44, 136], "interestingli": 20, "interf": 20, "interfac": [1, 11, 20, 38, 41, 64, 88, 91, 107, 108, 129, 131, 137, 150, 158, 160, 164, 165, 166, 173, 176], "interfer": [16, 17, 92, 155], "interleav": [15, 83, 102, 117, 129], "intermedi": [10, 11, 15, 22, 82, 83, 117, 129, 155], "intermediate_s": [128, 147], "intermitt": 10, "intern": [1, 2, 13, 15, 27, 30, 38, 40, 41, 42, 64, 83, 91, 96, 108, 115, 117, 120, 124, 131, 137, 140, 141, 151, 155, 164, 170, 176, 181], "internal_cutlass_kernel": 124, "internal_error": [22, 23, 24, 27, 87, 171], "internlm": [153, 154, 164], "internlm2": [153, 154, 164], "internvl2": 164, "interpol": 145, "interpolation_scal": 146, "interpret": [10, 20, 31, 39, 82, 110, 115, 142, 159], "interrupt": 20, "intersect": 114, "interst": 30, "intertwin": 143, "interv": [8, 26, 27, 29, 30, 32, 33, 159], "interven": 10, "intflag": [147, 149], "intpsplitdim": 1, "intra": 138, "introduc": [2, 4, 8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 42, 83, 91, 92, 98, 99, 101, 103, 105, 108, 131, 133, 153, 164, 176, 177, 178], "introduct": [20, 21, 109, 140, 141, 163, 164], "inttensor": [150, 160, 165], "intuit": [15, 38, 140, 141], "inv": 145, "inv_freq": 145, "invalid": [0, 1, 16, 20, 73, 74, 92, 114, 155, 164, 170], "invalid_st": 1, "invalidateremoteag": 0, "inventori": [44, 136], "invers": [83, 117], "invest": [44, 136], "investig": [2, 10, 20, 164], "invoc": [41, 164], "invok": [0, 10, 11, 16, 20, 39, 42, 84, 88, 107, 115, 119, 155, 168, 173, 182], "invokequant": 129, "involv": [0, 1, 7, 14, 15, 17, 20, 26, 41, 83, 92, 100, 108, 125, 129, 146, 166, 167, 181], "io": [9, 11, 18, 21, 22, 26, 29, 30, 32, 33, 67, 68, 69, 83, 108, 117, 132, 133, 151, 163, 164], "ip": [0, 41, 164], "ipc": [9, 18, 21, 26, 29, 30, 32, 33, 110, 158, 159, 163, 164], "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 145, "ireducelay": 145, "irrelev": 10, "irrespect": [0, 118, 159], "is_alibi": 145, "is_async": 96, "is_caus": 146, "is_comm_sess": 159, "is_const_v": 1, "is_context_fmha_en": 148, "is_cuda_graph": [83, 167], "is_cutlass_min_lat": 145, "is_def": 145, "is_dora": 122, "is_dynam": 145, "is_enc_dec": 150, "is_expert": 146, "is_fin": 159, "is_gated_activ": 145, "is_gemma_2": 147, "is_gemma_3": 147, "is_keep_al": [19, 74, 107, 159], "is_linear_tre": 159, "is_loc": 146, "is_medusa_mod": 150, "is_mla_en": 145, "is_mla_enabled_flag": 145, "is_module_excluded_from_quant": 159, "is_mrop": 145, "is_network_input": 145, "is_orchestrator_mod": 150, "is_public_pool": [19, 74, 107, 159], "is_qkv": 146, "is_redrafter_mod": 150, "is_rop": 145, "is_trt_wrapp": 145, "is_use_oldest": [19, 74, 107, 159], "is_valid": [145, 146], "is_valid_cross_attn": 146, "isagentst": 0, "isalnum": 159, "isalpha": 159, "isascii": 159, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdecim": 159, "isdigit": 159, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 145, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 115], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 145, "isidentifi": 159, "iskeyword": 159, "iskvcacheen": 1, "isl": [0, 3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 44, 45, 86, 92, 109, 136, 143, 170], "isl8192": 17, "island": [30, 34], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 145, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "islow": 159, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": [16, 34, 64, 155], "isnon": 1, "isnumer": 159, "isoffload": 0, "isoftmaxlay": 145, "isol": 21, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 164], "ispipelineparallel": 1, "ispoint": 1, "isprint": 159, "isrnnbas": 1, "isrun": 0, "issequencefin": [0, 115], "issocketst": 0, "isspac": 159, "issu": [8, 10, 11, 12, 14, 16, 18, 21, 29, 30, 31, 32, 33, 34, 35, 40, 44, 45, 70, 83, 92, 97, 105, 109, 110, 113, 114, 117, 129, 131, 136, 137, 145, 155, 170], "istensorparallel": 1, "isthreadsaf": 0, "istitl": 159, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isupp": 159, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "itali": 94, "ite": 150, "item": [0, 11, 15, 108, 115, 150, 159], "itensor": [0, 145], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 8, 10, 11, 13, 14, 16, 19, 20, 21, 22, 27, 38, 41, 42, 65, 73, 83, 96, 98, 102, 107, 108, 115, 117, 125, 130, 136, 137, 142, 143, 144, 150, 159, 164], "iter_i": 8, "iter_stat": 164, "iter_stats_max_iter": [65, 159], "iteration_log": 22, "iterationresult": 159, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 27], "iterlatencymillisec": 164, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 145, "itl": [16, 31, 34, 35, 44, 139, 143, 164], "its": [0, 1, 3, 5, 11, 12, 13, 14, 16, 17, 19, 21, 35, 39, 41, 42, 44, 59, 65, 83, 92, 95, 96, 97, 102, 103, 105, 107, 108, 115, 117, 118, 119, 120, 126, 128, 129, 130, 131, 136, 138, 140, 141, 142, 143, 145, 150, 151, 158, 159, 166, 167, 182], "itself": [10, 15, 16, 86, 115, 150, 159, 164, 170], "itsuji": [44, 136], "iunarylay": 145, "j": [4, 7, 8, 11, 14, 44, 67, 68, 69, 83, 117, 118, 136, 145, 153, 154, 164], "jacobi": 125, "jai": 164, "jame": 30, "jamesthez": 164, "janpetrov": 164, "japanes": [44, 122, 136], "jargon": 34, "jax": [21, 128, 131], "jenkin": [40, 109], "jensen": 64, "jersei": [30, 31, 34], "jetson": 21, "jfk": 30, "ji": 145, "jit": [2, 164], "jj": 145, "jk": 145, "jl749": 164, "job": [31, 39, 68, 69, 129], "job_nam": 31, "job_tim": 31, "john": [29, 30], "join": [11, 17, 64, 91, 92, 159, 176], "joint": 15, "joint_attention_kwarg": 147, "joint_attn_forward": 146, "joke": 72, "journei": [13, 163], "jpeg": 27, "jpg": [44, 136], "json": [0, 1, 9, 10, 16, 18, 21, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 40, 43, 44, 46, 47, 48, 49, 53, 54, 59, 73, 81, 92, 109, 115, 128, 135, 136, 148, 159, 163, 164, 172], "json_data": 159, "json_object": 159, "json_schema": [94, 159], "json_schema_extra": 159, "jsonconfigstr": 0, "jsondecodeerror": 79, "jsonl": [22, 44, 73, 136], "jsonseri": 0, "judgement": 16, "juli": 12, "just": [0, 1, 11, 14, 15, 16, 19, 29, 30, 32, 33, 34, 35, 44, 66, 67, 68, 69, 70, 100, 107, 110, 125, 136, 137, 144, 150, 151, 164], "justic": [55, 70, 113, 156, 158, 163], "justifi": 159, "k": [1, 13, 14, 15, 19, 42, 72, 73, 83, 103, 106, 107, 109, 116, 117, 118, 122, 125, 145, 153, 155, 159, 160, 164, 165, 167, 180], "k2": 36, "k8": 109, "k_b_proj_tran": 145, "k_dim": 145, "k_nope": 12, "k_nope_s": 12, "k_proj": [44, 91, 99, 130, 136, 160, 165, 170, 176, 177], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kb": 20, "kbatchedpostprocessornam": [0, 115], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 164, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 126], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpumemfract": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 115], "keep": [0, 12, 13, 15, 16, 19, 21, 34, 39, 45, 83, 102, 117, 118, 124, 131, 144, 145, 159, 164], "keepdim": 145, "keepend": 159, "kei": [0, 3, 7, 8, 9, 11, 12, 14, 15, 16, 19, 20, 21, 22, 34, 42, 44, 45, 64, 65, 71, 83, 85, 91, 94, 96, 97, 100, 102, 107, 109, 115, 121, 129, 136, 142, 147, 150, 155, 159, 166, 167, 169, 170, 176, 181], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kennedi": 30, "kept": [16, 19, 83, 117, 131, 145, 159], "kequal_progress": 0, "kera": 131, "kernel": [1, 3, 14, 15, 17, 23, 29, 31, 32, 33, 42, 43, 71, 73, 83, 85, 90, 102, 104, 107, 108, 109, 117, 121, 124, 129, 135, 139, 143, 145, 148, 150, 151, 155, 159, 162, 164, 169, 175], "kernel_s": [73, 145, 146, 159], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [145, 146], "keyvaluecacheparam": [146, 147], "keyword": [27, 86, 130, 145, 151, 159, 164, 170], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 129], "kfp16": 0, "kfp32": [0, 159], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kick": 39, "kill": 18, "kimi": 36, "kimi_k2": 27, "kimi_k2_thinking_output": 31, "kind": [10, 11, 13, 16, 116, 117, 119, 182], "kinflight": 0, "king": 30, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 145, "kjson": [0, 115], "kjson_schema": [0, 115], "kleader": 0, "klength": 0, "klinear": 1, "kllguidanc": 0, "klookahead": 0, "klookaheaddecod": 1, "km": 30, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 26, 144, 145, 159, 164], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [11, 30, 34, 43, 57, 118, 135, 144, 145], "knowledg": [19, 34], "known": [11, 14, 16, 29, 34, 35, 36, 39, 83, 102, 103, 109, 113, 117, 124, 125, 129, 145, 154, 163], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": 0, "korea": 27, "kosmo": [154, 164], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 115], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "kt": 159, "kt_cache_dtyp": [73, 159], "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kubernet": [17, 52, 92], "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 122, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 3, 7, 8, 12, 14, 15, 18, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 42, 44, 45, 50, 57, 58, 71, 73, 84, 85, 89, 90, 93, 95, 98, 100, 103, 108, 109, 114, 115, 122, 129, 131, 134, 136, 137, 142, 145, 148, 150, 152, 157, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 172, 174, 175, 182], "kv_b_proj": 145, "kv_cach": [0, 41, 71, 159], "kv_cache_block_offset": [145, 146, 150, 155], "kv_cache_block_point": 155, "kv_cache_config": [9, 27, 31, 44, 50, 51, 56, 64, 65, 71, 73, 74, 95, 98, 104, 144, 159, 181], "kv_cache_connector": 64, "kv_cache_dtyp": [2, 26, 73, 136, 139, 149, 181], "kv_cache_enable_block_reus": [150, 164], "kv_cache_fract": 73, "kv_cache_free_gpu_mem_fract": [2, 16, 21, 22, 45, 144], "kv_cache_free_gpu_memory_fract": [17, 18, 21, 24, 26, 27, 150, 164], "kv_cache_host_memory_byt": 121, "kv_cache_host_s": 65, "kv_cache_manag": [0, 83, 164, 166, 167, 181, 182], "kv_cache_max_token": 65, "kv_cache_page_s": 65, "kv_cache_param": [83, 146, 147, 167], "kv_cache_qu": 104, "kv_cache_quant_algo": [44, 99, 128, 136, 139, 159, 177], "kv_cache_quant_mod": [117, 145], "kv_cache_retention_config": [57, 159], "kv_cache_reus": 27, "kv_cache_scaling_factor": [83, 117, 128], "kv_cache_tensor": [64, 96], "kv_cache_typ": [23, 129, 150, 159, 164], "kv_connector_config": [64, 159], "kv_dtype": 147, "kv_head": 146, "kv_host_cache_byt": 121, "kv_lora_rank": [145, 146], "kv_orig_quant_scal": 145, "kv_quant_orig_scal": 145, "kv_transfer_sender_future_timeout_m": 159, "kv_transfer_timeout_m": 159, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 11, 13, 92, 114, 150, 164], "kvcacheblock": 120, "kvcacheblockpool": 120, "kvcacheconfig": [0, 50, 56, 65, 71, 73, 74, 97, 98, 104, 109, 117, 121, 144, 151, 159, 164], "kvcacheconnectorconfig": [64, 159], "kvcacheconnectorschedul": 64, "kvcacheconnectorwork": 64, "kvcachecreateddata": [0, 159], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": 0, "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 42, 83, 102, 117, 121, 150, 167, 181], "kvcachemetr": 0, "kvcacheparam": [83, 167], "kvcacheremoveddata": [0, 159], "kvcacheretentionconfig": [0, 57, 109, 159], "kvcaches": 0, "kvcachestat": [0, 27], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 159], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 159], "kvcachetransferstart": 0, "kvcachetyp": [1, 150, 159], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 159], "kvfactor": 0, "kvheadnum": 145, "kvram": 0, "kvtransfersenderfuturetimeoutm": 0, "kvtransfertimeoutm": 0, "kwarg": [11, 24, 38, 85, 86, 88, 91, 130, 131, 145, 146, 147, 150, 159, 160, 164, 165, 169, 170, 173, 176], "kwd": 159, "kwrite": 0, "kxgrammar": 0, "l": [27, 44, 67, 68, 69, 125, 136, 154, 161, 170], "l0_a100": 39, "l0_mergerequest": 39, "l0_sanity_check": 39, "l0_test": 39, "l2": 164, "l304": 13, "l345": 13, "l4": 21, "l40": [21, 162], "l440": 13, "l506": 13, "l546": 13, "l823": 13, "lab": [11, 44, 136], "label": [119, 145, 146, 147, 159], "labelembed": 146, "lack": [0, 1, 38], "laguardia": 30, "lai": 14, "lake": [30, 34], "lambda": [0, 115], "lamportinitializeal": 1, "landmark": [30, 34, 35], "langchain": 11, "languag": [0, 3, 8, 12, 13, 16, 17, 19, 42, 43, 89, 99, 101, 104, 107, 118, 125, 129, 135, 145, 153, 154, 161, 162, 164, 166, 174, 177, 178], "language_adapt": [150, 164], "language_adapter_config": 150, "language_adapter_rout": [147, 150], "language_adapter_uid": 150, "language_model": 130, "languageadapterconfig": 150, "languageadapteruid": 0, "laps": 57, "laptop": 21, "larg": [0, 2, 3, 7, 8, 12, 13, 15, 17, 18, 20, 23, 27, 42, 43, 44, 47, 65, 76, 99, 100, 101, 102, 103, 107, 108, 109, 117, 121, 123, 125, 129, 131, 135, 136, 138, 139, 142, 143, 145, 148, 150, 151, 154, 155, 159, 162, 164, 166, 177, 178], "larger": [0, 4, 5, 7, 15, 17, 19, 20, 21, 31, 34, 36, 37, 42, 45, 65, 83, 96, 108, 117, 118, 121, 125, 126, 136, 145, 150, 151, 159, 164], "largest": [3, 4, 5, 30, 34, 35, 107, 108, 118, 145], "last": [0, 1, 10, 12, 13, 14, 16, 18, 19, 26, 29, 30, 32, 33, 65, 66, 83, 98, 102, 115, 117, 122, 123, 125, 127, 142, 144, 145, 147, 159, 170], "last_lay": 150, "last_process_for_ub": 145, "last_token_id": [145, 147, 155], "last_token_ids_for_logit": 147, "last_tokens_id": 145, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastli": [21, 41], "lastpositionidsbas": 1, "lasttokentim": 0, "late": [11, 70], "latenc": [0, 4, 5, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 28, 36, 42, 44, 45, 83, 84, 92, 95, 96, 102, 107, 108, 109, 117, 121, 125, 142, 143, 144, 145, 148, 159, 162, 164, 168], "latent": [12, 15, 85, 95, 146, 147, 169, 170], "later": [0, 1, 5, 18, 29, 30, 31, 32, 33, 34, 35, 38, 86, 97, 108, 118, 122, 125, 129, 131, 139, 143, 150, 151, 155, 157, 170], "latest": [0, 2, 15, 18, 29, 30, 32, 33, 67, 68, 69, 100, 104, 109, 110, 129, 132, 164], "latitud": 34, "latter": [7, 30, 40, 102, 115, 164], "launch": [10, 15, 16, 17, 20, 27, 42, 67, 68, 69, 71, 100, 101, 107, 108, 109, 110, 113, 121, 129, 137, 155, 157, 158, 164, 172, 178], "launch_hostfunc": 10, "launch_llama_3": 129, "layer": [0, 1, 12, 14, 16, 17, 20, 21, 23, 41, 83, 86, 92, 95, 96, 97, 98, 102, 103, 107, 108, 109, 114, 116, 117, 118, 119, 120, 122, 125, 127, 128, 129, 130, 138, 145, 148, 150, 151, 153, 155, 159, 160, 164, 165, 167, 170, 181], "layer1": 122, "layer_id": 107, "layer_idx": [64, 83, 96, 122, 127, 145, 150, 160, 165, 167], "layer_names_onli": [23, 159], "layer_norm": [145, 146], "layer_quant_mod": 159, "layer_typ": 150, "layer_updates_per_it": [16, 29, 31], "layerid": [1, 122], "layeridx": 1, "layernorm": [23, 108, 127, 143, 145, 146, 148, 164], "layernorm_quantization_plugin": 148, "layernorm_shar": 146, "layernorm_typ": 146, "layernormpositiontyp": 145, "layernormtyp": [145, 146], "layertyp": [1, 119], "layerwis": 159, "layout": [9, 41, 83, 102, 142, 164, 170], "lead": [8, 11, 12, 16, 17, 20, 21, 23, 29, 32, 44, 45, 70, 92, 102, 108, 110, 113, 114, 119, 121, 125, 129, 136, 137, 139, 142, 143, 148, 159, 164], "leader": [0, 18, 150], "leaf": 97, "leak": [10, 164], "learn": [4, 5, 7, 16, 18, 19, 21, 55, 60, 61, 63, 105, 113, 139, 145, 156, 158, 163], "learned_absolut": [128, 145, 146, 147], "least": [0, 16, 19, 27, 70, 83, 97, 102, 108, 115, 117, 131, 142, 150, 159], "leav": [17, 92, 97, 102, 108, 142, 143, 144], "left": [11, 17, 19, 20, 45, 86, 142, 144, 145, 159, 170], "legaci": [11, 130, 144, 148, 164], "legend": 8, "lego": 11, "len": [0, 1, 11, 26, 29, 30, 31, 32, 33, 34, 35, 44, 64, 66, 73, 85, 136, 145, 150, 159, 169, 182], "length": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 44, 45, 57, 66, 68, 71, 73, 83, 85, 92, 95, 97, 98, 102, 107, 108, 117, 121, 136, 137, 140, 141, 143, 144, 145, 150, 151, 155, 159, 164, 167, 169, 181], "length_penalti": [118, 150, 159], "lengthlengthpenalti": 118, "lengthpenalti": [0, 1, 118], "leq": 8, "less": [0, 4, 10, 14, 16, 19, 45, 83, 97, 98, 106, 115, 117, 118, 129, 145, 159], "lesser": 97, "let": [10, 11, 12, 13, 16, 21, 30, 44, 50, 102, 119, 127, 128, 130, 132, 136, 142, 145], "letter": [94, 145], "level": [0, 1, 8, 10, 11, 12, 14, 15, 17, 20, 21, 22, 23, 24, 27, 38, 43, 44, 82, 83, 86, 88, 89, 102, 107, 108, 109, 115, 117, 120, 122, 124, 127, 128, 130, 131, 135, 136, 151, 158, 159, 160, 162, 164, 165, 170, 173, 174], "leverag": [3, 8, 10, 12, 13, 14, 16, 17, 18, 19, 21, 29, 30, 31, 32, 33, 34, 35, 89, 90, 100, 125, 139, 162, 174, 175, 176], "lf": [2, 18, 110, 122, 158], "lfz941": 164, "lga": 30, "lgai": [154, 161], "lh": 1, "lib": [31, 131, 136], "liberti": [30, 34], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 110, "libopenmpi": [89, 113, 174], "librari": [16, 17, 20, 21, 22, 32, 83, 89, 90, 92, 108, 110, 124, 129, 155, 158, 162, 164, 167, 174, 175], "libtensorrt_llm": 110, "libuc": 31, "libzmq3": 113, "licens": [112, 113, 158], "life": 70, "lifecycl": [52, 105, 120], "lifetim": [20, 64, 108], "light": [90, 175], "lightweight": [10, 16, 19, 24, 89, 90, 108, 117, 174, 175], "like": [0, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 38, 39, 40, 42, 44, 50, 55, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 72, 82, 83, 86, 89, 90, 91, 92, 94, 97, 102, 103, 104, 107, 108, 111, 113, 114, 115, 117, 118, 119, 121, 125, 128, 129, 131, 136, 137, 138, 139, 142, 143, 144, 145, 148, 151, 155, 156, 157, 158, 159, 160, 163, 164, 165, 166, 170, 174, 175, 176, 181], "likelihood": [66, 82, 116, 121, 125], "likewis": 91, "limit": [0, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 22, 27, 28, 29, 30, 36, 37, 50, 64, 65, 83, 92, 98, 102, 107, 108, 109, 113, 114, 115, 117, 118, 119, 129, 131, 137, 142, 144, 145, 148, 150, 151, 154, 158, 159, 167], "lin": 3, "line": [2, 7, 17, 22, 26, 27, 29, 30, 32, 33, 34, 39, 40, 44, 56, 66, 67, 68, 69, 70, 73, 86, 94, 97, 121, 136, 137, 139, 143, 151, 159, 163, 164, 170, 181, 182], "linear": [1, 12, 15, 83, 89, 122, 125, 127, 128, 129, 145, 151, 153, 160, 164, 165, 167, 170, 174], "linearactiv": 146, "linearapproximategelu": 146, "linearbas": 146, "lineargeglu": 146, "lineargelu": 146, "linearli": 151, "linearswiglu": 146, "liner": 27, "link": [2, 13, 19, 20, 21, 39, 121, 132, 133, 164], "linspac": 145, "lint": 159, "linux": [20, 29, 30, 31, 32, 33, 34, 35, 40, 112, 154, 164], "linux_x86_64": 110, "list": [0, 1, 10, 14, 16, 26, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 44, 50, 57, 64, 66, 82, 83, 84, 85, 86, 90, 91, 96, 97, 98, 106, 107, 108, 109, 110, 115, 117, 118, 119, 128, 129, 130, 134, 136, 137, 145, 146, 147, 150, 154, 155, 159, 164, 167, 168, 169, 175, 176, 181, 182], "list_siz": 146, "listen": 9, "lite": 95, "liter": [34, 148, 159], "littl": [14, 16, 143], "live": [39, 151], "livecodebench": 13, "ljust": 159, "lkm2835": 164, "ll": [7, 14, 21, 34, 107], "ll128": [12, 16, 20], "llama": [4, 5, 7, 10, 18, 19, 23, 24, 26, 32, 33, 36, 44, 55, 60, 64, 67, 68, 69, 73, 74, 78, 84, 86, 89, 90, 91, 94, 104, 105, 106, 107, 109, 113, 118, 122, 125, 126, 130, 131, 137, 138, 140, 141, 142, 144, 147, 148, 152, 153, 154, 156, 157, 158, 160, 161, 162, 163, 164, 165, 168, 170, 172, 174, 175, 176, 179, 180], "llama2": [3, 4, 83, 109, 122, 164], "llama3": [26, 36, 74, 145, 164], "llama4": [16, 36, 98, 103, 109, 159, 164], "llama4_output": 33, "llama4forconditionalgener": [154, 161], "llama_13b": 5, "llama_70b": 5, "llama_7b": [122, 126], "llama_7b_with_lora_qkv": 122, "llama_model_path": 50, "llamaconfig": [147, 160, 165], "llamaforcausallm": [130, 131, 147, 154, 161], "llamamodel": 147, "llava": [104, 130, 152, 153, 154, 162, 164], "llava_dict": 130, "llavallamamodel": [154, 161], "llavanextforconditionalgener": [154, 161], "llavanextvisionconfig": 147, "llavanextvisionwrapp": 147, "llguidanc": [0, 10, 59, 79, 94, 159, 164], "lllama": 108, "llm": [0, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 36, 39, 40, 42, 43, 45, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 88, 90, 91, 92, 96, 97, 98, 99, 102, 105, 106, 107, 111, 112, 113, 114, 115, 117, 118, 119, 120, 121, 122, 123, 127, 129, 134, 135, 138, 139, 140, 141, 143, 144, 145, 147, 148, 149, 150, 153, 155, 157, 159, 160, 165, 166, 167, 168, 173, 175, 176, 177, 179, 180, 181, 182], "llm_advanc": 71, "llm_arg": [38, 45, 64, 86, 88, 99, 159, 170, 173, 177], "llm_engine_dir": 150, "llm_id": 159, "llm_inference_async_rai": 105, "llm_inference_distribut": 158, "llm_inference_distributed_rai": 105, "llm_kv_cache_connector": [64, 96], "llm_kv_cache_offload": 65, "llm_mgmn_": 164, "llm_mgmn_llm_distribut": 67, "llm_mgmn_trtllm_bench": 68, "llm_mgmn_trtllm_serv": 69, "llm_models_root": [76, 78], "llm_option": 45, "llm_ptq": [104, 179], "llm_runtim": 71, "llm_sampl": 72, "llm_sparse_attent": 73, "llm_util": 159, "llm_worker": 11, "llmapi": [16, 26, 27, 38, 45, 50, 56, 57, 59, 64, 65, 67, 68, 69, 71, 73, 74, 99, 104, 107, 115, 139, 150, 159, 164, 177], "llmarg": [27, 45, 86, 109, 123, 159, 164, 170], "llmrequest": [1, 64, 96, 164, 181, 182], "llmrequestptr": 1, "llmrequestst": 182, "lm": [29, 32, 33, 125, 159], "lm_eval": [24, 29, 32, 33], "lm_head": [10, 127, 130, 136, 164], "lmm": [44, 118, 136], "lmsy": [101, 178], "ln_emb": 130, "ln_f": [127, 130], "lo": 158, "load": [0, 1, 6, 9, 12, 13, 15, 17, 18, 21, 22, 23, 29, 30, 31, 32, 33, 34, 35, 41, 42, 45, 64, 65, 67, 68, 69, 73, 79, 84, 85, 86, 89, 92, 96, 99, 109, 122, 127, 128, 129, 131, 136, 137, 143, 144, 147, 149, 150, 151, 158, 159, 162, 163, 164, 168, 169, 170, 172, 174, 177], "load_balanc": [16, 29, 31, 159], "load_base64_imag": 27, "load_config": [91, 176], "load_format": 159, "load_model_on_cpu": 147, "load_tensor": 130, "load_test_audio": 150, "load_test_data": 150, "load_weight": [91, 160, 165, 176], "loaded_weight": 146, "loader": [109, 159, 164], "loadformat": 159, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [0, 1, 2, 9, 13, 16, 21, 23, 24, 29, 30, 31, 32, 33, 34, 35, 36, 40, 44, 45, 55, 60, 61, 62, 63, 67, 68, 69, 74, 89, 95, 103, 110, 111, 113, 128, 129, 136, 139, 148, 156, 159, 163, 164, 172, 174, 181], "local_build": 40, "local_in_featur": 146, "local_layer_idx": 146, "local_model": [67, 68, 69], "local_model_path": 21, "local_out_featur": 146, "local_path": 32, "local_path_to_model": 158, "local_us": [2, 31, 34, 35, 110], "localcr": 1, "localhost": [9, 17, 18, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 46, 47, 48, 49, 51, 53, 54, 75, 76, 77, 78, 79, 80, 92, 94, 99, 163, 172, 177], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localn": 159, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 13, "localscaless": 1, "localtotals": 1, "locat": [15, 16, 20, 29, 30, 31, 32, 33, 34, 36, 39, 45, 83, 94, 95, 108, 110, 118, 119, 129, 136, 145, 155, 163, 167], "locate_accepted_draft_token": 150, "lock": [10, 16, 20, 44, 113, 136], "lockstep": 0, "log": [0, 1, 9, 20, 21, 22, 23, 24, 26, 27, 31, 44, 65, 67, 68, 69, 72, 73, 82, 83, 89, 94, 106, 107, 117, 120, 132, 136, 145, 148, 151, 159, 164, 174, 180], "log_ctx_0": 92, "log_ctx_1": 92, "log_field_chang": 148, "log_gen_0": 92, "log_level": [22, 23, 24, 27], "log_sampl": [29, 32, 33], "log_softmax": 145, "logger": [64, 87, 171], "logic": [10, 11, 17, 20, 24, 40, 42, 66, 91, 96, 101, 106, 107, 108, 115, 120, 130, 131, 146, 147, 159, 160, 164, 165, 166, 176, 182], "login": [9, 132], "logit": [0, 10, 12, 13, 14, 42, 44, 58, 82, 93, 107, 109, 118, 125, 136, 145, 150, 155, 159, 161, 164, 180], "logits_dtyp": [23, 128, 147], "logits_processor": [66, 106, 150, 159], "logits_processor_map": 150, "logits_processor_nam": 150, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 115], "logitspostprocessorconfig": [0, 115, 164], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [66, 106, 150, 159, 164], "logitsprocessorlist": 150, "logitsvec": 1, "logn": [145, 164], "logn_scal": 145, "logprob": [0, 1, 18, 21, 29, 30, 31, 32, 33, 34, 35, 50, 72, 159, 163, 164], "logprobs_diff": 159, "logprobscba": 1, "logprobstil": 1, "london": [94, 155], "long": [7, 8, 10, 12, 16, 17, 23, 24, 28, 29, 30, 32, 33, 34, 35, 36, 37, 43, 83, 92, 95, 97, 99, 102, 103, 108, 109, 117, 135, 136, 137, 138, 139, 142, 143, 148, 151, 159, 162, 164, 177], "long_factor": 145, "long_mscal": [145, 146], "long_rop": 145, "long_rope_embed_posit": 146, "long_rope_embed_positions_for_gpt_attent": 146, "long_rope_rotary_cos_sin": 145, "long_rope_rotary_inv_freq": [145, 146], "longbench": 24, "longer": [0, 8, 10, 13, 15, 16, 18, 20, 21, 28, 29, 30, 32, 33, 34, 35, 36, 37, 45, 98, 102, 118, 121, 142, 145, 159, 182], "longest": [14, 102, 142, 145], "longitud": 34, "longrop": 145, "longtensor": [66, 150], "look": [0, 6, 10, 11, 12, 16, 44, 64, 92, 94, 107, 110, 115, 131, 136, 164], "lookahead": [0, 1, 159, 164], "lookahead_config": [150, 159], "lookahead_decod": [23, 147], "lookaheadalgoconfig": 1, "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 109, 159], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadprompt": 1, "lookaheadruntimebuff": 1, "lookup": [19, 39, 96, 107, 145, 146, 159, 164], "lookup_plugin": 145, "lookuperror": 159, "loop": [0, 10, 11, 19, 20, 42, 107, 108, 115, 118, 129, 130, 144, 159], "loos": 41, "lopuhin": 164, "lora": [0, 1, 22, 58, 81, 89, 91, 93, 109, 115, 134, 145, 146, 147, 148, 150, 159, 162, 164, 174, 176], "lora_0": [44, 136], "lora_adapt": [99, 177], "lora_ckpt_sourc": [23, 99, 150, 159, 177], "lora_config": [44, 70, 99, 136, 147, 159, 164, 177], "lora_dir": [23, 44, 70, 99, 122, 136, 150, 177], "lora_help": [70, 159, 177], "lora_hidden_st": 146, "lora_int_id": [44, 78, 99, 136, 159, 177], "lora_layer_param": 146, "lora_manag": [99, 150, 164], "lora_nam": [44, 78, 99, 136, 159, 177], "lora_param": 147, "lora_path": [44, 78, 99, 136, 159, 177], "lora_plugin": [23, 122, 145, 148, 150], "lora_rank": [122, 145], "lora_req1": [99, 177], "lora_req2": [99, 177], "lora_request": [44, 70, 78, 99, 136, 159, 177], "lora_runtime_param": 146, "lora_target_modul": [23, 44, 99, 122, 136, 147, 150, 177], "lora_task_uid": 150, "lora_uid": 150, "lora_weights_point": 145, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 70, 99, 122, 147, 159, 164, 177], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 150, "loramodulenam": 1, "loraparam": 147, "loraprefetchdir": 0, "lorarequest": [70, 99, 109, 159, 177], "loraruntimeparam": 146, "lorataskidtyp": [0, 1], "loraweight": 122, "lose": 108, "loss": [7, 21, 139], "lost": 164, "lot": [14, 83, 102, 108, 117, 121, 129], "loudspeak": 5, "louvr": 35, "love": 18, "lovelac": [104, 154, 162, 164], "low": [2, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 23, 28, 29, 30, 32, 33, 36, 44, 83, 89, 95, 107, 108, 109, 117, 127, 131, 145, 148, 159, 162, 164, 174], "low_lat": 21, "low_latency_benchmark": 21, "low_latency_gemm": [124, 145], "low_latency_gemm_plugin": [23, 136, 139, 146, 148], "low_latency_gemm_swiglu": 145, "low_latency_gemm_swiglu_plugin": [23, 139, 148], "low_rank": 145, "lower": [0, 1, 2, 6, 7, 8, 15, 17, 21, 29, 30, 32, 33, 34, 35, 38, 45, 72, 86, 92, 97, 102, 104, 107, 108, 109, 114, 118, 119, 121, 122, 139, 145, 151, 159, 170], "lowercas": 159, "lowest": [21, 57, 86, 97, 159, 170], "lowprecis": [123, 145, 159], "lowprecisiondata": 12, "loyalti": 66, "lpddr5x": 16, "lru": [1, 97, 121, 145], "lstrip": 159, "lt": 145, "lunch": 16, "luotuo": [78, 122], "m": [0, 2, 4, 8, 13, 16, 17, 20, 22, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 44, 45, 53, 54, 59, 70, 99, 108, 136, 137, 139, 142, 143, 145, 151, 153, 177], "ma": [30, 34], "macceptancethreshold": 0, "machin": [2, 7, 18, 96, 121, 163, 164], "macro": 124, "madditionalmodeloutput": 0, "maddr": 0, "maddress": 1, "made": [12, 20, 41, 97, 111, 164, 182], "madv_hugepag": 20, "madvis": 20, "magentnam": 0, "magic": 16, "magpi": 19, "mahmoudashraf97": 164, "mai": [0, 1, 2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 56, 67, 68, 69, 83, 86, 89, 92, 102, 107, 108, 110, 113, 114, 115, 117, 118, 121, 122, 123, 124, 125, 128, 129, 130, 131, 132, 135, 136, 137, 143, 144, 145, 146, 148, 151, 155, 158, 159, 160, 161, 163, 164, 165, 166, 167, 170, 174, 181], "main": [3, 6, 10, 11, 12, 13, 14, 16, 18, 19, 20, 22, 27, 29, 30, 31, 32, 33, 43, 47, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 91, 96, 107, 113, 115, 118, 120, 135, 137, 139, 143, 144, 145, 151, 155, 156, 158, 159, 160, 163, 165, 176], "mainland": 30, "mainli": [14, 16, 41, 108, 159], "mainstream": [17, 92], "maintain": [3, 4, 7, 8, 12, 16, 29, 30, 32, 33, 34, 35, 36, 38, 44, 89, 91, 103, 106, 107, 108, 122, 124, 136, 139, 153, 163, 164, 174, 176], "mainten": [8, 42], "major": [13, 20, 30, 38, 131, 151, 164, 170], "majority_vote_control": 11, "majorityvot": 11, "majorityvotecontrol": 11, "make": [1, 2, 7, 8, 11, 12, 13, 14, 16, 18, 19, 20, 21, 29, 30, 31, 32, 33, 34, 35, 36, 38, 44, 64, 65, 70, 83, 86, 92, 96, 102, 105, 107, 108, 109, 110, 113, 114, 117, 119, 122, 124, 129, 131, 132, 133, 136, 137, 144, 145, 155, 158, 159, 162, 164, 170], "make_causal_mask": 146, "make_env": 40, "makedir": 64, "makeloopbackag": 0, "makeshap": 1, "maketran": 159, "maketransferag": 0, "malachowski": 64, "malici": 97, "malign": 1, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [23, 145, 148, 153, 154, 159, 164, 170], "mamba1": 145, "mamba2": [145, 164], "mamba_conv1d": 145, "mamba_conv1d_plugin": [23, 148, 150], "mamba_ssm_cache_dtyp": 159, "mamba_vers": 145, "mambaconfig": 147, "mambaforcausallm": 147, "manag": [0, 1, 10, 11, 15, 16, 17, 21, 23, 42, 50, 52, 65, 66, 71, 83, 84, 90, 91, 92, 96, 102, 105, 107, 108, 109, 113, 114, 117, 125, 129, 137, 144, 148, 150, 151, 157, 158, 162, 163, 164, 166, 167, 168, 175, 176], "manage_weight": 148, "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 38, 108, 115, 128], "manhattan": [30, 34], "mani": [0, 10, 11, 14, 15, 16, 18, 19, 21, 23, 26, 29, 30, 32, 33, 45, 83, 92, 97, 98, 102, 107, 114, 117, 120, 121, 125, 129, 131, 133, 139, 142, 144, 145, 148, 154, 155, 159], "manipul": 119, "manner": [12, 16, 20, 119], "mantissa": 4, "manual": [10, 15, 16, 29, 30, 32, 33, 34, 35, 39, 50, 86, 89, 104, 108, 113, 150, 155, 158, 159, 174], "manual_config": 170, "manufactur": [44, 136], "map": [0, 1, 8, 10, 11, 13, 16, 17, 18, 20, 21, 29, 30, 32, 33, 38, 39, 41, 44, 45, 83, 91, 92, 103, 107, 108, 115, 117, 119, 123, 127, 128, 129, 130, 131, 145, 146, 147, 148, 150, 159, 164, 170, 176, 181], "map_loc": 64, "map_weight": [91, 176], "mappingintstrani": 159, "mappingnamespac": 159, "mard1no": 164, "margin": [10, 44, 136, 142], "mark": [1, 12, 19, 38, 39, 92, 102, 107, 119, 142, 145, 155, 161], "mark_as_remov": 119, "mark_output": [115, 145], "markalldon": 1, "markdon": 1, "markdown": 34, "marker": [11, 20, 39, 159], "market": 66, "marks101": 164, "marktaskdon": 1, "mask": [0, 1, 13, 14, 83, 98, 117, 125, 145, 146, 147, 150, 167], "mask_typ": 145, "masked_scatt": 145, "masked_scatter_": 145, "masked_select": [145, 164], "massachusett": 30, "massiv": [2, 21], "master": [138, 139, 140, 141], "mat2": 145, "match": [0, 14, 17, 19, 20, 24, 29, 30, 32, 33, 34, 35, 38, 39, 42, 44, 64, 67, 68, 69, 71, 84, 90, 95, 96, 97, 106, 107, 108, 116, 119, 121, 125, 136, 145, 146, 150, 155, 159, 164, 168, 172, 175], "match_and_rewrit": 119, "matcher": 108, "materi": [1, 115], "materializewithtag": 1, "math": [11, 12, 13, 15, 154, 161], "math500": 11, "matichon": 164, "matmul": [23, 117, 129, 139, 145, 148, 153], "matric": [99, 116, 177], "matrix": [6, 12, 15, 21, 44, 83, 103, 109, 117, 129, 136, 138, 145, 157, 167, 170], "mattentionconfig": 0, "mattentiondpeventsgatherperiodm": 0, "mattentionlayernumperpp": 0, "mattentiontyp": 0, "matter": [21, 121, 170], "matur": 38, "maverick": [90, 107, 109, 125, 162, 175], "max": [0, 1, 3, 4, 5, 8, 9, 10, 15, 16, 19, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 44, 65, 83, 85, 86, 98, 102, 108, 122, 137, 139, 140, 143, 145, 150, 151, 155, 159, 164, 167, 169, 170], "max_": 8, "max_all_reduce_block": 1, "max_attention_window": [97, 98, 144, 159, 164], "max_attention_window_s": [117, 144, 145, 150], "max_attn_valu": 146, "max_batch_s": [2, 9, 10, 11, 14, 18, 19, 21, 22, 23, 24, 26, 27, 31, 44, 50, 51, 65, 67, 71, 73, 83, 84, 86, 88, 108, 117, 122, 126, 128, 129, 131, 136, 139, 142, 143, 145, 147, 150, 151, 155, 156, 159, 164, 168, 170, 172, 173, 181], "max_beam_width": [23, 24, 27, 50, 72, 83, 102, 106, 115, 117, 145, 147, 150, 151, 159], "max_block": [145, 182], "max_blocks_per_seq": 150, "max_blocks_per_sequ": 145, "max_boost_slid": [44, 136], "max_cache_storage_gb": 159, "max_completion_token": 94, "max_concurr": 159, "max_context_length": [145, 146, 150, 151], "max_cpu_lora": [70, 99, 177], "max_decoder_input_len": 147, "max_decoder_seq_len": 23, "max_dist": [117, 145, 146], "max_draft_len": [9, 18, 19, 23, 74, 107, 147, 149, 159], "max_draft_token": 150, "max_encoder_input_len": [23, 147, 159], "max_gen_token": 147, "max_gpu_total_byt": 159, "max_input_len": [22, 23, 44, 102, 122, 126, 128, 129, 136, 147, 150, 151, 159], "max_input_length": [24, 145, 146, 147, 150], "max_kv_seqlen": 145, "max_len": 24, "max_lora": [70, 99, 177], "max_lora_rank": [23, 44, 70, 99, 122, 136, 177], "max_low_rank": 145, "max_matching_ngram_s": [19, 74, 107, 159], "max_medusa_token": 150, "max_multimodal_len": 23, "max_new_token": [38, 73, 150, 151], "max_ngram_s": 159, "max_non_leaves_per_lay": 159, "max_num_draft_token": 10, "max_num_request": [83, 167, 181, 182], "max_num_sequ": 164, "max_num_stream": [108, 159], "max_num_token": [2, 9, 11, 21, 22, 23, 24, 26, 27, 31, 50, 51, 73, 83, 98, 103, 107, 136, 139, 142, 143, 147, 151, 156, 159, 164, 167], "max_output_len": [102, 129, 150, 155, 164], "max_output_length": 24, "max_output_token": [27, 49, 80], "max_period": 146, "max_position_embed": [102, 128, 145, 146, 147], "max_position_embedding_len": 145, "max_power_limit": [44, 136], "max_prompt_adapter_token": 159, "max_prompt_embedding_table_s": [23, 150, 159, 164], "max_record": 159, "max_retri": [29, 32, 33], "max_seq_len": [9, 18, 22, 23, 24, 26, 27, 31, 38, 65, 71, 73, 86, 88, 122, 126, 128, 129, 136, 144, 145, 146, 147, 150, 151, 159, 164, 170, 173, 181], "max_seqlen": [83, 102, 117, 145], "max_seqlen_for_logn_sc": 146, "max_sequence_length": [83, 117, 150], "max_split_size_mb": 29, "max_throughput": 21, "max_throughput_benchmark": 21, "max_token": [9, 11, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 46, 47, 48, 56, 57, 59, 64, 65, 66, 71, 72, 73, 74, 75, 76, 77, 78, 86, 92, 94, 97, 99, 144, 159, 163, 170, 172, 177], "max_token_count": 66, "max_tokens_in_buff": [31, 92, 159], "max_tokens_in_paged_kv_cach": [144, 150, 164], "max_total_draft_token": 159, "max_util": [0, 22, 144, 159], "max_verification_set_s": 159, "max_window_s": 159, "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 118], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 115, 164], "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxgputotalbyt": 0, "maxim": [0, 3, 5, 8, 12, 13, 15, 18, 21, 42, 44, 95, 101, 136, 144, 178], "maximum": [0, 1, 2, 5, 8, 11, 16, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 44, 45, 73, 83, 85, 92, 97, 98, 102, 107, 114, 115, 117, 118, 136, 139, 145, 146, 150, 151, 155, 159, 162, 164, 169, 181], "maxinputlen": [1, 118], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 164], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": [0, 27], "maxnumpath": 1, "maxnumsequ": [1, 164], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 115, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 118], "maxsequencelength": 1, "maxsplit": 159, "maxstopwordslen": 1, "maxtoken": [0, 151, 164], "maxtokensinbuff": 0, "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mayb": 34, "maybe_capture_hidden_st": 107, "maybe_to_pybind": 159, "mb": [20, 151, 159], "mbackedstorag": 1, "mbackend": 0, "mbackendagentdesc": 0, "mbackendtyp": 0, "mbackground": 1, "mbackstream": 1, "mbacktyp": 1, "mbadhandl": 1, "mbart": [154, 164], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbind": 20, "mbindoffset": 1, "mbp": 59, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconfigur": 1, "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": [0, 1], "mcopyonpartialreus": 0, "mcp": [11, 164], "mcpu": 1, "mcpudiff": 1, "mcreator": 1, "mcrosskvcachefract": 0, "mct": 11, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [13, 89, 107, 125, 127, 145, 159, 164, 166, 174], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": [0, 1], "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [9, 27, 30, 31, 34, 47, 64, 70, 72, 76, 79, 94, 163, 172], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 17, 19, 22, 24, 26, 29, 30, 32, 33, 34, 38, 43, 44, 45, 53, 54, 56, 68, 70, 83, 94, 97, 102, 107, 108, 116, 117, 118, 121, 125, 128, 130, 131, 135, 136, 137, 138, 144, 145, 148, 150, 151, 159], "meaning": [1, 2, 15, 139, 143], "meant": [34, 140, 141, 148, 159], "meantim": 164, "meanwhil": [10, 20], "mearlystop": 0, "measur": [0, 3, 5, 6, 7, 8, 12, 14, 15, 16, 19, 20, 21, 44, 84, 92, 108, 114, 136, 137, 164, 168], "mechan": [12, 16, 17, 20, 40, 64, 92, 96, 97, 115, 129, 159, 181, 182], "media": [27, 44, 136, 164], "media_io_kwarg": 27, "media_path": [44, 136], "median": [11, 26, 29, 30, 32, 33], "medic": 21, "medium": [7, 24, 30, 90, 155, 164, 175], "medusa": [0, 1, 22, 23, 145, 147, 150, 159, 164], "medusa_choic": [22, 125, 136, 150, 159], "medusa_decode_and_verifi": 150, "medusa_hidden_act": 149, "medusa_logit": 150, "medusa_model_dir": 149, "medusa_output_token": 150, "medusa_path": 150, "medusa_position_offset": 150, "medusa_temperatur": [125, 150], "medusa_topk": 150, "medusa_tree_id": 150, "medusachoic": [0, 1], "medusaconfig": 147, "medusacurtokensperstep": 1, "medusadecodingconfig": [109, 159], "medusaforcausallm": 147, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [7, 16, 17, 30, 34, 108, 145], "megan": 29, "memavail": 20, "membeddingt": 0, "member": [0, 1, 11, 20, 118, 119, 126, 129, 145], "memcpi": 20, "memfre": 20, "meminfo": 20, "memlock": [9, 18, 21, 26, 110, 155, 163], "memori": [0, 1, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 29, 30, 32, 33, 34, 35, 41, 44, 45, 50, 56, 64, 71, 83, 84, 88, 91, 92, 96, 98, 99, 102, 103, 104, 107, 108, 109, 114, 116, 117, 118, 120, 122, 129, 130, 131, 136, 137, 142, 143, 145, 148, 150, 155, 159, 162, 164, 167, 168, 172, 176, 177, 181], "memorydesc": 0, "memorypoolfre": [1, 151], "memorypoolreserv": [1, 151], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memset": [1, 12], "memsetconfigur": 1, "memtot": 20, "memtyp": 1, "memusagechang": 151, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 70, "mental_health_lora_dir": 70, "mention": [2, 11, 12, 20, 21, 29, 30, 32, 33, 34, 35, 50, 95, 118, 131, 139], "mentri": 1, "mentryit": 1, "menu": [132, 133], "merg": [12, 13, 16, 40, 145, 164], "merlin": 21, "mesh": 170, "meshgrid": 145, "meshgrid2d": 145, "messag": [0, 9, 13, 18, 20, 21, 24, 27, 30, 31, 34, 35, 41, 45, 46, 47, 75, 76, 79, 83, 92, 94, 114, 123, 145, 151, 159, 163, 164, 172], "met": [0, 1, 115, 125], "meta": [24, 32, 33, 44, 45, 64, 68, 74, 84, 86, 90, 131, 136, 137, 144, 154, 158, 159, 161, 168, 170, 172, 175], "meta_ckpt_dir": 147, "metadata": [0, 17, 20, 27, 41, 44, 64, 83, 92, 96, 107, 108, 120, 136, 159, 160, 165, 167], "metadata_server_config_fil": 27, "metal": [157, 164], "meth": 158, "method": [0, 1, 3, 8, 10, 11, 14, 15, 16, 17, 20, 40, 41, 44, 50, 65, 66, 83, 89, 91, 92, 96, 98, 104, 106, 107, 108, 109, 113, 115, 117, 118, 124, 125, 126, 128, 129, 131, 136, 148, 150, 153, 155, 159, 160, 164, 165, 166, 174, 176, 181, 182], "metric": [0, 8, 15, 16, 17, 20, 24, 31, 34, 35, 43, 44, 45, 84, 92, 135, 136, 137, 139, 142, 143, 159, 164, 168], "metrics_log_interv": 27, "metro": 30, "metropoli": [21, 30], "metropolitan": 34, "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfailfastonattentionwindowtoolarg": 0, "mfastlogit": 0, "mfirstgentoken": 0, "mfirsttim": 1, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 164, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [16, 164], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 125, "mh1": 125, "mha": [3, 15, 23, 83, 89, 90, 97, 102, 117, 120, 145, 148, 150, 167, 174, 175], "mhandl": 1, "mhandler": 0, "mhasindexerkcach": 0, "mhiddens": 1, "mhostcaches": 0, "mi": [30, 153], "mib": 151, "micro": [0, 151], "microbatchid": 0, "microbatchschedul": [166, 182], "microsecond": 0, "microsoft": [86, 90, 128, 154, 161, 170, 175], "mid": [19, 38, 164], "middl": [11, 43, 135], "mig": [20, 21], "might": [0, 2, 7, 10, 16, 23, 34, 44, 97, 98, 110, 115, 129, 131, 136, 137, 138, 143, 150, 151, 155, 159, 164, 181], "migrat": [38, 131, 148, 164], "million": [44, 95, 136], "millisecond": [0, 97, 159], "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 4, 13, 14, 15, 22, 28, 29, 30, 36, 44, 108, 118, 136, 137, 143, 145, 155, 159], "min_lat": 145, "min_length": [118, 150], "min_p": [0, 118, 150, 159], "min_token": 159, "mind": [7, 144, 158], "mindexerdimperhead": 0, "mindexerkcachequantblocks": 0, "mindim": 1, "mindimfirst": 1, "mini": [90, 164, 175], "minim": [8, 12, 13, 16, 17, 20, 21, 41, 42, 91, 92, 100, 102, 103, 142, 164, 176], "minimum": [0, 8, 18, 21, 22, 44, 45, 83, 117, 118, 136, 139, 145, 150, 151, 159], "minitron": [90, 154, 161, 164, 175], "minittozero": 1, "minlat": [123, 159], "minlength": [1, 118, 164], "minnormedscorescba": 1, "minor": [42, 164], "minp": [0, 1, 118], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 164], "mintpsplitdim": 1, "minut": [0, 7, 18, 21, 137], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mirror": 159, "mirror_pybind_enum": 159, "mirror_pybind_field": 159, "mish": 146, "mismatch": [131, 155], "misorchestr": 0, "mispagefre": 1, "miss": [0, 44, 119, 136, 164], "missedblock": [0, 27], "missedblocksperrequest": 0, "mission": [8, 13, 16, 17], "mistral": [44, 90, 104, 116, 136, 139, 143, 152, 153, 154, 161, 164, 175], "mistral3": [154, 164], "mistral3forconditionalgener": [154, 161], "mistralai": [44, 90, 136, 154, 161, 175], "mistralforcausallm": [154, 161], "misus": [38, 164], "miterstatsmaxiter": 0, "mitig": [8, 13, 16, 131], "mix": [15, 21, 34, 92, 108, 114, 138, 143, 159, 164], "mixed_precis": 159, "mixed_sampl": 164, "mixer": 164, "mixtral": [44, 90, 104, 116, 122, 136, 139, 143, 153, 154, 161, 164, 175], "mixtralforcausallm": [154, 161], "mixtur": [15, 16, 26, 29, 30, 32, 33, 34, 35, 143, 164, 170], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": [9, 29, 30, 32, 33, 132], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "mkvtransfersenderfuturetimeoutm": 0, "mkvtransfertimeoutm": 0, "ml": [145, 164], "mla": [8, 12, 13, 14, 85, 103, 108, 109, 145, 161, 164, 169], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [154, 164], "mllamaconfig": 147, "mllamaforcausallm": 147, "mllamaforconditionalgener": [154, 161], "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mlop": 21, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [23, 108, 122, 127, 129, 130, 145, 148, 155, 160, 164, 165, 170], "mlp_4h_to_h": [23, 122], "mlp_bia": 147, "mlp_gate": [23, 122], "mlp_gate_up": [23, 122], "mlp_h_to_4h": [23, 122], "mlp_output": 155, "mlp_router": [23, 122], "mlperf": [21, 109], "mlphiddens": 1, "mlptype": 145, "mm": [20, 164], "mm_1": 108, "mm_data": [44, 136], "mm_embed": [65, 159], "mm_embedding_handl": [30, 31, 34, 159], "mm_embedding_offload": 150, "mm_encoder_onli": 159, "mma": [15, 145], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmap": 20, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxgputotalbyt": 0, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumsequ": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmaxtokensinbuff": 0, "mmedusachoic": 0, "mmemori": 1, "mmemorytyp": 1, "mmha": [145, 164], "mminp": 0, "mmintoken": 0, "mmkei": 0, "mmlphiddens": 1, "mmlu": [7, 13, 95, 164], "mmlu_llmapi": 164, "mmmu": [44, 136], "mmode": 1, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmprojector": 164, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mmulticast": 1, "mmultimodalhash": 0, "mmultimodallength": 0, "mmultimodalposit": 0, "mmutex": 1, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnnvl": [16, 103, 123, 145, 159, 164], "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "mobil": 64, "modal": [22, 100, 104, 153, 161, 162], "mode": [0, 1, 10, 15, 18, 20, 21, 22, 23, 24, 27, 31, 40, 42, 59, 67, 68, 69, 83, 92, 95, 99, 105, 107, 108, 116, 117, 119, 129, 130, 144, 145, 146, 150, 151, 153, 159, 160, 164, 165, 177], "mode_t": 0, "model": [0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 38, 41, 42, 43, 46, 47, 48, 49, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 85, 86, 88, 91, 92, 93, 94, 96, 97, 98, 99, 101, 102, 105, 106, 108, 113, 114, 115, 116, 117, 120, 121, 122, 123, 126, 128, 131, 135, 137, 140, 144, 145, 146, 148, 149, 150, 151, 152, 153, 156, 157, 159, 163, 167, 168, 169, 170, 172, 173, 176, 177, 178, 179, 180, 181, 182], "model_architectur": 159, "model_arg": [29, 32, 33], "model_cl": 146, "model_cls_fil": 23, "model_cls_nam": 23, "model_computed_field": 159, "model_config": [23, 91, 150, 159, 160, 165, 176], "model_config_cpp": 150, "model_construct": 159, "model_copi": 159, "model_dir": [11, 14, 67, 100, 122, 126, 127, 128, 129, 130, 131, 136, 138, 147, 149, 155], "model_dump": 159, "model_dump_json": 159, "model_engin": [166, 181], "model_extra": 159, "model_factori": [88, 173], "model_field": 159, "model_fields_set": 159, "model_format": 159, "model_json_schema": 159, "model_kwarg": [86, 88, 170, 173], "model_nam": [16, 35, 45, 68, 150], "model_parametrized_nam": 159, "model_path": [16, 21, 22, 26, 29, 31, 32, 33, 43, 44, 64, 68, 72, 73, 96, 99, 126, 135, 136, 177], "model_post_init": [148, 159], "model_qu": 136, "model_rebuild": 159, "model_valid": 159, "model_validate_json": 159, "model_validate_str": 159, "model_weights_load": [130, 164], "modelconfig": [0, 38, 91, 118, 150, 160, 164, 165, 176], "modelconfigpython": 150, "modelengin": [42, 107, 108, 166, 181], "modelidtomodel": 1, "modeling_deepseekv3": [13, 15], "modeling_gemma3": [91, 176], "modeling_llama": [160, 165], "modeling_mymodel": [160, 165], "modeling_opt": [160, 165], "modeling_util": [91, 99, 159, 160, 165, 176, 177], "modelmodel_dump": 159, "modelmodel_dump_json": 159, "modelnam": 1, "modelopt": [29, 32, 33, 44, 45, 128, 131, 136, 149, 164], "modelpath": 0, "modelrunn": [128, 150, 164], "modelrunnercpp": [150, 164], "modelrunnermixin": 150, "models_as_dict": 159, "modeltyp": [0, 126], "modelvari": 1, "modelweightsformat": 130, "modelweightsload": [130, 164], "moder": [8, 17, 72, 92], "modern": 150, "modif": [12, 30, 96, 119, 129, 159], "modifi": [10, 11, 12, 20, 31, 44, 67, 68, 69, 86, 99, 106, 107, 109, 110, 115, 119, 136, 139, 143, 144, 155, 162, 164, 170, 177], "modul": [0, 1, 8, 11, 12, 13, 16, 17, 21, 22, 23, 27, 67, 68, 69, 83, 91, 92, 107, 108, 109, 110, 117, 118, 127, 128, 129, 130, 143, 145, 146, 147, 149, 150, 155, 159, 160, 164, 165, 170, 176], "modular": [11, 16, 38, 41, 91, 158, 162, 176], "modularli": [17, 92], "module1": 13, "module10": 13, "module11": 13, "module12": 13, "module13": 13, "module2": 13, "module3": 13, "module4": 13, "module5": 13, "module6": 13, "module7": 13, "module8": 13, "module9": 13, "module_id": 122, "module_nam": [91, 176], "module_names_breakdown": [91, 176], "module_weight": [91, 176], "moduleid": [1, 122], "moduleidtomodel": 1, "modulelist": [160, 165], "moduletyp": 1, "modulo": 145, "moe": [8, 12, 13, 14, 16, 23, 26, 31, 32, 33, 34, 35, 45, 63, 95, 108, 109, 122, 130, 143, 145, 147, 148, 159, 164, 170], "moe_4h_to_h": [23, 122], "moe_backend": [21, 29, 38, 73], "moe_cluster_parallel_s": [27, 159], "moe_config": [2, 9, 14, 16, 21, 27, 31, 51, 73, 159], "moe_ep_s": [73, 116], "moe_expert_parallel_s": [27, 31, 63, 73, 103, 159], "moe_finalize_allreduce_residual_rms_norm": 145, "moe_gat": [23, 122], "moe_gemm": 124, "moe_h_to_4h": [23, 122], "moe_load_balanc": [16, 31], "moe_plugin": [23, 148], "moe_rout": [23, 122], "moe_shared_": 16, "moe_tensor_parallel_s": [63, 103, 159], "moe_tp_siz": 116, "moeallreduceparam": 145, "moecomputeroutekernel": 12, "moeconfig": [38, 73, 109, 147, 159], "moeloadbalancerconfig": 159, "moetopk": 164, "mole": 170, "moment": 115, "monboardblock": 0, "mondemand": 1, "monitor": [17, 23, 92, 103, 120, 159], "monitor_memori": [23, 159], "monolith": [83, 102, 117], "monost": 0, "mont": 11, "month": [10, 44, 136], "monthli": [29, 30, 32, 33], "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 44, 45, 50, 52, 55, 66, 71, 72, 83, 84, 85, 88, 89, 92, 95, 97, 98, 102, 103, 106, 108, 109, 110, 114, 115, 116, 117, 118, 119, 120, 125, 127, 128, 129, 136, 137, 139, 142, 143, 144, 145, 148, 151, 155, 156, 158, 159, 160, 163, 164, 165, 167, 168, 169, 170, 173, 174, 180, 182], "moreov": 108, "most": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 15, 17, 20, 21, 34, 38, 39, 40, 43, 55, 60, 61, 63, 65, 72, 84, 92, 97, 98, 102, 106, 112, 113, 118, 120, 125, 129, 131, 135, 140, 141, 143, 144, 145, 151, 155, 156, 158, 159, 162, 163, 164, 168, 180], "mostli": [16, 107, 148], "motiv": 109, "mount": [9, 21, 27, 29, 30, 31, 32, 33, 67, 68, 69, 109, 110], "mount_dest": [67, 68, 69], "mount_dir": [67, 68, 69], "mountain": 30, "moutdim": 1, "moutdimfirst": 1, "mouth": 34, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 16, 38, 41, 96, 107, 108, 120, 131, 145, 155, 159, 164], "movement": [120, 129], "movi": 34, "mownsev": 1, "mownsstream": 1, "mp3": 27, "mp4": [27, 47, 76], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 17, 27, 43, 67, 68, 69, 92, 105, 113, 118, 129, 131, 135, 136, 137, 145, 155, 159, 164], "mpi4pi": [137, 155, 158, 164], "mpi_barri": 131, "mpi_comm_world": 118, "mpi_group_barri": 1, "mpi_sess": 159, "mpicomm": 0, "mpicommsess": [109, 159], "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [21, 128, 129, 137, 155, 158, 164], "mpisess": 159, "mpistat": 0, "mpitopologi": [146, 147], "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mpromptignorelength": 0, "mprompttableoffload": 0, "mprop": 1, "mpt": [7, 108, 153, 154, 164], "mptforcausallm": 147, "mptmodel": 147, "mptrexpertcount": 164, "mqa": [3, 6, 13, 23, 83, 103, 109, 117, 120, 145, 148, 159, 164, 167], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 145], "mrope_param": [146, 150], "mrope_position_delta": [145, 146, 150], "mrope_rotary_cos_sin": [145, 146], "mrope_rotary_cos_sin_s": 147, "mropeconfig": 0, "mropeparam": [146, 150], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 145, "mscale_all_dim": 145, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 159], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 13, 159], "msinktokenlength": 0, "msize": 1, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt": 19, "mt5": 154, "mtag": [0, 1], "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [2, 10, 16, 17, 29, 74, 93, 106, 108, 109, 159, 161, 162, 164, 180], "mtp3": 17, "mtp3_autoregress": 13, "mtp3_top1": 13, "mtp3_top10": 13, "mtp3_top15": 13, "mtp3_vanilla": 13, "mtp_eagle_one_model": [108, 159], "mtpdecodingconfig": [74, 107, 109, 159], "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [10, 11, 12, 14, 16, 20, 43, 45, 102, 121, 129, 135, 142, 151], "mul": 145, "multi": [0, 3, 10, 11, 12, 14, 15, 16, 21, 23, 26, 47, 67, 68, 69, 76, 84, 85, 89, 90, 95, 97, 98, 103, 104, 105, 109, 110, 114, 115, 116, 118, 121, 122, 125, 128, 131, 137, 145, 147, 148, 151, 153, 159, 162, 164, 167, 168, 169, 172, 174, 175], "multi_block_mod": [117, 150, 159, 164], "multi_gpu": 73, "multi_round": [26, 29, 30, 31, 32, 33, 34, 35], "multiblockmod": 0, "multicast": 1, "multicastconfigur": 1, "multidimension": 145, "multihead": [3, 129], "multiheadlatentattent": 173, "multimap": 1, "multimod": [0, 22, 23, 44, 81, 108, 109, 136, 150, 154, 156, 158, 159, 164], "multimodal_embedding_handl": 159, "multimodal_hash": 159, "multimodal_test_data_path": 76, "multimodalembed": 0, "multimodalencod": [109, 159], "multimodalhash": 0, "multimodalinput": 0, "multimodallength": 0, "multimodalmodelrunn": 150, "multimodalposit": 0, "multinod": 138, "multinomi": 118, "multipl": [0, 1, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 27, 29, 30, 31, 38, 39, 58, 64, 65, 67, 71, 72, 83, 86, 90, 92, 97, 98, 99, 102, 103, 106, 112, 114, 115, 116, 117, 118, 119, 120, 121, 125, 129, 130, 137, 138, 139, 142, 145, 146, 148, 150, 155, 159, 162, 164, 167, 170, 175, 177], "multiple_profil": [23, 136, 139, 143, 148, 164], "multipli": [11, 15, 21, 83, 117, 130, 145], "multiply_and_lora": 146, "multiply_collect": 146, "multiprocessor": 129, "multithread": 0, "multiturn": 24, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "museum": 35, "museuvm": 0, "must": [0, 1, 10, 16, 17, 20, 21, 23, 29, 30, 32, 33, 38, 57, 59, 65, 67, 68, 69, 83, 91, 92, 94, 95, 96, 97, 102, 103, 106, 107, 108, 113, 114, 115, 116, 117, 118, 121, 122, 125, 129, 133, 139, 145, 146, 148, 150, 153, 155, 159, 170, 176], "mutabl": 1, "mutablepageptr": 1, "mutat": 108, "mutates_arg": 108, "mutex": [0, 1, 20], "mutlictaskvmod": 164, "mutual": [20, 118, 153], "muvm": 1, "muvmdiff": 1, "mvalu": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "mxfp4": [30, 104, 164], "mxfp8": [30, 104, 164], "mxfp8xmxfp4": 164, "my": [1, 31, 42, 44, 55, 56, 57, 60, 61, 62, 63, 71, 92, 99, 104, 106, 113, 114, 136, 156, 158, 163, 177, 179, 180], "my_config": [86, 170], "my_faster_on": 50, "my_llm_task": 158, "my_model": 127, "my_profile_export": [53, 54], "my_test": 39, "myattent": [160, 165], "myconfig": [160, 165], "mycustomlogitsprocessor": 106, "mycustomweightload": [91, 176], "mydecoderlay": [127, 160, 165], "mydraft": 107, "mymodel": [127, 160, 165], "mymodelforcausallm": [127, 160, 165], "n": [1, 8, 9, 11, 14, 15, 18, 20, 21, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 42, 44, 55, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 82, 83, 84, 92, 98, 101, 102, 106, 107, 108, 109, 113, 114, 117, 122, 125, 128, 129, 136, 137, 142, 145, 146, 147, 148, 151, 152, 153, 155, 156, 158, 159, 161, 163, 164, 168, 178], "n1": [18, 31, 34, 66, 71], "n2": [18, 31, 34, 66], "n3": [31, 34, 66], "n4": [34, 66], "n_worker": 159, "na": [44, 136, 164], "nah": 34, "naiv": [83, 90, 143, 175], "naivepatternrewriter_replaceaddwithsub": 119, "nalso": [21, 34], "name": [0, 1, 9, 10, 12, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 42, 44, 45, 55, 56, 57, 60, 61, 62, 63, 64, 67, 68, 69, 71, 72, 74, 79, 82, 91, 94, 95, 103, 104, 106, 107, 108, 109, 113, 115, 118, 119, 122, 124, 128, 129, 132, 136, 137, 145, 147, 148, 149, 150, 155, 156, 158, 159, 160, 163, 164, 165, 170, 172, 176, 179, 180], "named_network_output": 155, "named_paramet": 130, "namedtupl": 159, "namespac": [0, 1, 147, 148, 158, 159], "nano": [27, 90, 175], "nanobind": [108, 164], "nanoflow": [101, 178], "narg": 73, "narrow": 38, "nation": [44, 136], "nationwid": [44, 136], "nativ": [4, 15, 16, 21, 22, 90, 108, 131, 158, 160, 162, 164, 165, 175], "native_quant_flow": 147, "nativegenerationcontrol": 11, "natur": [8, 15, 16, 19, 27, 47, 76, 131, 137, 159], "naur": [0, 115, 159], "naver": 154, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbook": 29, "nbrnnlayer": 1, "ncache_transceiver_config": 92, "nccl": [13, 16, 23, 92, 114, 123, 129, 145, 148, 155, 159, 164], "nccl_graph_mixing_support": [92, 114], "nccl_p2p_level": 164, "nccl_plugin": [23, 148], "nccl_symmetr": [145, 159], "ncclplugin": 129, "ncclrecv": [16, 145], "ncclsend": [16, 145], "ncoordin": 34, "ncuda_graph_config": 18, "nd": [44, 136, 145], "ndarrai": [145, 146, 150], "ndim": 145, "nearest": [15, 26, 29, 30, 32, 33, 34, 35, 42, 145, 159], "nearli": [4, 15, 20, 109, 119], "necess": 125, "necessari": [0, 1, 10, 11, 13, 15, 16, 40, 42, 70, 97, 103, 116, 125, 139, 145, 159, 164, 181], "necessarili": [1, 92, 114, 129, 151], "necessit": [8, 16], "need": [1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 29, 30, 32, 33, 34, 35, 36, 38, 39, 41, 44, 45, 50, 59, 63, 64, 65, 67, 68, 69, 70, 83, 91, 92, 96, 97, 98, 102, 105, 107, 108, 110, 113, 114, 115, 117, 118, 119, 121, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 147, 148, 150, 151, 155, 158, 159, 160, 162, 164, 165, 166, 167, 170, 176, 181, 182], "needed_block": 182, "needs_kv_cache_rewind": 107, "needs_separate_short_long_cuda_graph": 159, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 10, 66, 144, 145, 159], "neglig": [7, 12, 121, 142], "neighbor": 34, "neighborhood": [30, 34], "neither": [115, 145, 151, 159], "nemo": [23, 90, 105, 128, 137, 150, 153, 154, 162, 164, 175], "nemo_ckpt_dir": 147, "nemo_prompt_convert": 150, "nemotron": [90, 154, 161, 164, 175], "nemotron_na": 164, "nemotronforcausallm": [154, 161], "nemotronh_nano_vl_v2": 161, "nemotronna": [154, 161, 164], "nemotronnasforcausallm": [154, 161], "nenable_attention_dp": 18, "nenable_min_lat": 18, "neox": [83, 117, 118, 153, 154, 164], "nest": [27, 38, 86, 119, 170], "net": [19, 121, 155, 159], "net_guard": 119, "network": [15, 16, 21, 23, 45, 59, 83, 96, 106, 109, 115, 116, 117, 119, 123, 129, 131, 145, 151, 153, 155, 159, 164], "neural": [116, 119, 129, 164], "neva": [154, 164], "never": [1, 10, 20, 44, 57, 97, 119, 136, 144, 159], "nevertheless": 16, "new": [0, 1, 4, 5, 8, 10, 11, 12, 13, 14, 16, 19, 20, 27, 29, 30, 31, 32, 33, 34, 35, 42, 45, 46, 48, 49, 55, 60, 61, 62, 63, 64, 66, 73, 75, 77, 80, 90, 91, 94, 96, 97, 102, 103, 105, 107, 109, 110, 113, 115, 117, 118, 119, 121, 122, 125, 126, 131, 132, 142, 143, 145, 150, 156, 157, 158, 159, 162, 163, 164, 166, 172, 175, 176, 181], "new_block_id": 64, "new_decoder_architectur": [128, 147], "new_generated_id": 150, "new_input": 119, "new_line_token": 66, "new_method": 38, "new_nam": [91, 176], "new_out": 119, "new_request": 64, "new_shap": 145, "new_tensor": 145, "new_token": [64, 150], "new_workflow": 164, "new_york": 94, "newactiverequestsqueuelatencym": 0, "newark": 30, "newer": [154, 164], "newest": [5, 19, 107, 159], "newli": [0, 10, 11, 14, 16, 20, 64, 102, 142, 159], "newlin": 39, "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 4, 10, 11, 12, 14, 16, 26, 29, 30, 32, 33, 36, 42, 97, 101, 102, 107, 108, 109, 110, 122, 125, 129, 131, 138, 139, 142, 143, 144, 150, 151, 152, 154, 161, 162, 164, 178], "next_logit": 150, "next_medusa_input_id": 150, "next_medusa_logit": 150, "next_positive_power_of_2": 108, "next_step_buff": 150, "next_step_tensor": 150, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 14, "nextpositionoffset": 1, "nfinal": 34, "nfirst": 34, "ngc": [9, 21, 29, 30, 32, 33, 40, 110, 112, 113, 157, 164], "ngiven": 31, "ngoanpv": 164, "ngram": [0, 10, 74, 118, 147, 159, 162, 164], "ngramdecodingconfig": [19, 74, 107, 109, 159], "ngramsiz": 0, "ngroup": 145, "nhead": 145, "nhere": 59, "ni": [34, 59, 153], "nic": [16, 92, 114], "nice": 16, "nif": 30, "nixl": [0, 17, 41, 92, 159, 164], "nj": [34, 62], "njane": [55, 60, 61, 62, 63, 113, 156, 158, 163], "njason": 70, "nkv_cache_config": 18, "nlet": [21, 31, 34], "nmh": 150, "nmt": [150, 154, 164], "nn": [91, 108, 145, 160, 165, 176], "no_context": 24, "no_kv_cache_reus": 164, "no_quant": 159, "no_repeat_ngram_s": [118, 150, 159], "no_schedule_after_st": 182, "no_schedule_until_st": 182, "no_skip_tokenizer_init": 22, "no_weights_load": 22, "noauxtckernel": 13, "node": [0, 12, 14, 15, 16, 17, 18, 20, 23, 24, 26, 43, 67, 68, 69, 92, 103, 105, 108, 110, 114, 118, 123, 135, 137, 138, 145, 148, 150, 153, 155, 159, 162, 164], "node1": 31, "node2": 31, "noexcept": [0, 1], "noh": 34, "nomin": [55, 62, 113, 156, 158, 163], "non": [0, 7, 10, 11, 12, 13, 14, 15, 16, 20, 23, 29, 30, 32, 33, 34, 35, 44, 45, 65, 83, 85, 92, 97, 100, 108, 114, 117, 120, 126, 129, 131, 145, 148, 155, 159, 164, 169], "non_block": [64, 66], "non_gated_vers": 145, "none": [1, 11, 19, 23, 24, 26, 27, 29, 32, 33, 42, 44, 50, 57, 64, 66, 70, 72, 73, 74, 76, 82, 83, 91, 97, 98, 101, 102, 106, 108, 118, 119, 127, 130, 131, 136, 137, 142, 145, 146, 147, 148, 149, 150, 155, 159, 160, 164, 165, 167, 176], "nonetyp": [150, 159], "nonexist": 31, "nontrivi": 107, "nonzero": [108, 145], "nope": 12, "nor": [16, 151, 159], "norepeatngrams": [0, 1, 118], "norm": [2, 15, 43, 44, 45, 68, 130, 135, 136, 137, 145, 148, 159, 160, 164, 165], "norm_before_bmm1": [146, 147], "norm_elementwise_affin": 146, "norm_ep": 146, "norm_epsilon": [128, 147], "norm_factor": [83, 117], "norm_num_group": 146, "norm_pre_residual_weight": 145, "norm_quant_fus": [23, 148], "norm_typ": 146, "norm_weight": 145, "normal": [0, 7, 10, 11, 13, 14, 15, 16, 19, 20, 22, 44, 118, 121, 122, 126, 136, 145, 151, 159, 164], "normalize_log_prob": 159, "normalize_weight": 122, "normalized_shap": [145, 146], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [30, 127, 129, 155], "northeast": [30, 34], "northeastern": [29, 30, 31, 32, 33, 34, 163], "not_op": 145, "notabl": 7, "notat": [14, 159], "note": [1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 44, 50, 64, 67, 68, 69, 82, 83, 86, 91, 98, 102, 104, 106, 108, 110, 112, 114, 119, 121, 122, 123, 124, 125, 129, 133, 136, 139, 142, 144, 145, 148, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 165, 170, 176, 181], "noth": [20, 107], "notic": 70, "notif": 0, "notifysyncmessag": 0, "notimplementederror": [11, 131], "notr": 35, "nougat": [104, 153, 154, 164], "novel": 8, "now": [3, 11, 12, 13, 14, 16, 19, 20, 29, 30, 32, 33, 34, 35, 41, 44, 45, 73, 89, 102, 108, 118, 124, 125, 128, 130, 136, 142, 148, 151, 159, 164, 174], "np": 145, "nprioriti": 34, "npy": 150, "npytorch_backend_config": 27, "nsight": [12, 109], "nspeculative_config": 18, "nsy": [12, 43, 135], "ntask": [27, 67, 68, 69, 129], "nthat": 34, "nthere": 148, "nucleu": 72, "null": [1, 9, 18, 21, 29, 30, 31, 32, 33, 34, 35, 44, 68, 84, 128, 136, 148, 163, 168], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 2, 22, 26, 29, 30, 31, 32, 33, 34, 35, 38, 43, 44, 45, 68, 86, 102, 108, 135, 136, 137, 139, 140, 143, 159, 170], "num_accepted_token": 107, "num_attention_head": [128, 145, 146, 147], "num_aud_token": 150, "num_beam": [118, 150], "num_beam_group": 118, "num_block": [150, 181], "num_bucket": [145, 146], "num_capture_lay": 159, "num_channel": [146, 147], "num_class": 146, "num_computed_block": 64, "num_computed_token": [64, 96], "num_concurr": [29, 32, 33], "num_context": [83, 167], "num_ctx_serv": 31, "num_ctx_token": [83, 167], "num_draft_token": [0, 145, 150], "num_draft_tokens_alloc": 107, "num_eagle_lay": 159, "num_embed": 146, "num_experts_per_tok": 116, "num_fewshot": [24, 29, 32, 33], "num_gen_serv": 31, "num_gener": [83, 167], "num_gpu": [21, 30, 34, 35], "num_group": [145, 146], "num_head": [12, 83, 102, 103, 117, 130, 145, 150, 167], "num_hidden_lay": [86, 88, 128, 147, 160, 165, 170, 173, 181], "num_imag": 150, "num_img_token": 150, "num_inst": [17, 92], "num_key_value_head": [128, 147, 181], "num_kv_head": [83, 120, 145, 146, 150, 167, 181], "num_kv_heads_origin": 145, "num_kv_heads_per_cross_attn_lay": 150, "num_kv_heads_per_lay": 150, "num_lay": [145, 146, 150, 181], "num_ln_in_parallel_attn": 147, "num_local_block": 146, "num_local_expert": 116, "num_lora_module_lay": 122, "num_lora_modules_lay": 122, "num_medusa_head": [147, 149, 150, 159], "num_medusa_lay": [147, 149], "num_multimodal_token": 0, "num_nextn_predict_lay": [2, 14, 29, 74, 107, 159], "num_nextn_predict_layers_from_model_config": 159, "num_of_token": 108, "num_orig_po": 145, "num_po": 145, "num_postprocess_work": [27, 30, 31, 34, 159], "num_profil": 147, "num_prompt": [26, 29, 30, 31, 32, 33, 34, 35], "num_q_head": 13, "num_request": [2, 14, 21, 22, 44, 45, 99, 136, 177], "num_return_sequ": [150, 164], "num_sampl": [24, 43, 73, 135], "num_slot": [16, 29, 31], "num_task": 146, "num_token": [8, 13, 83, 96, 117, 145, 167], "num_tokens_per_block": [145, 181], "num_tokens_per_task": 146, "num_video": 150, "numa": [16, 123], "numa_alloc_onnod": 20, "numa_bind": 31, "numacceptedtoken": 0, "numactiverequest": [0, 27], "numactl": [16, 20], "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 44, 45, 65, 67, 68, 69, 83, 84, 85, 92, 95, 96, 97, 98, 102, 104, 106, 107, 108, 114, 115, 116, 117, 118, 120, 125, 129, 136, 137, 138, 139, 142, 143, 144, 145, 146, 150, 151, 153, 155, 159, 160, 164, 165, 167, 168, 169, 181], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxgpu": 17, "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 150, "numensurework": 0, "numer": [13, 20, 38, 42, 118, 123, 136, 154, 159], "numexpert": 1, "numgeneratedtoken": 0, "numgengpu": 17, "numgenrequest": 0, "numgensequ": 1, "numgentoken": [0, 1], "numhead": 118, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 118, "numlanguag": 1, "numlay": 118, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 164, "numnod": [0, 164], "numpag": 1, "numpausedrequest": 0, "numpi": [122, 145, 150], "numputwork": 0, "numqueuedrequest": [0, 164], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 115], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvbugspro": 39, "nvcc": 2, "nvcr": [9, 21, 26, 29, 30, 32, 33, 67, 68, 69, 163, 164], "nvfp4": [2, 12, 13, 16, 22, 23, 31, 32, 33, 35, 44, 90, 136, 148, 159, 164, 175, 179], "nvfp4_awq": 159, "nvfp4_gemm": 108, "nvfp4_gemm_2": 108, "nvfp4_gemm_config": 159, "nvfp4gemmconfig": 159, "nvidia": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 44, 45, 46, 47, 48, 49, 51, 53, 54, 55, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 83, 90, 92, 94, 102, 103, 104, 105, 106, 107, 109, 110, 111, 113, 114, 128, 129, 131, 132, 136, 137, 143, 145, 148, 151, 154, 155, 156, 158, 161, 162, 163, 164, 175, 179, 180], "nvila": [100, 154, 164], "nvinfer1": [0, 1], "nvl": [1, 164], "nvl36": 138, "nvl72": [12, 15, 20, 21, 29, 45, 138, 154], "nvl72136": 31, "nvlink": [12, 16, 17, 21, 92, 103, 114, 118, 123, 137, 138, 140, 141, 164], "nvme": 96, "nvswitch": [13, 21, 129], "nvtx": [20, 159], "nwait": 34, "nwe": 21, "ny": [34, 94], "nyc": [30, 34], "nyeah": 34, "nyou": 59, "n\u7b54\u6848": 78, "o": [0, 1, 6, 9, 10, 13, 15, 18, 20, 21, 27, 29, 30, 31, 32, 33, 34, 35, 43, 64, 67, 68, 69, 76, 78, 96, 119, 122, 131, 135, 155, 159], "o_proj": [12, 130, 170], "oai": [27, 47, 76], "oauthtoken": 9, "obei": [108, 155], "obj": 159, "obj0": 159, "obj1": 159, "object": [0, 1, 8, 18, 20, 21, 29, 30, 31, 32, 33, 34, 35, 38, 50, 57, 59, 66, 79, 82, 86, 88, 91, 94, 96, 97, 106, 107, 108, 115, 121, 127, 129, 130, 131, 145, 146, 147, 148, 150, 151, 159, 163, 164, 166, 170, 173, 176], "observ": [8, 12, 15, 17, 19, 20, 45, 65, 92, 107, 108, 114], "obtain": [1, 12, 17, 45, 107, 111, 145], "obtain_answ": 11, "obviou": [2, 15], "occas": 155, "occasion": 164, "occup": [12, 42, 83, 101, 117, 151, 178], "occupi": [7, 10, 15, 16, 30, 151], "occur": [8, 17, 92, 107, 118, 121, 159, 164, 181, 182], "occurr": 159, "ocean": [30, 34, 72], "off": [2, 10, 15, 17, 39, 42, 43, 89, 107, 108, 121, 124, 135, 139, 142, 143, 151, 164, 174], "offer": [7, 8, 10, 11, 13, 17, 19, 20, 40, 83, 104, 129, 167], "offic": 59, "officenetsecur": 59, "offici": [2, 12, 14, 30, 44, 83, 90, 108, 117, 136, 175], "offlin": [5, 8, 15, 24, 45, 55, 100, 103, 109, 127, 136, 164], "offload": [0, 1, 16, 23, 58, 96, 120, 126, 159, 164], "offloadconfigur": 1, "offloading_dis": 65, "offloading_en": 65, "offset": [1, 10, 145, 150, 153, 164], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 7, 11, 12, 13, 16, 17, 34, 42, 92, 115, 120, 125, 138, 139, 145, 159], "oh": 11, "ok": [108, 155], "okai": [11, 34], "old": [14, 119, 122, 155, 159], "older": [110, 121, 131, 154], "oldest": [19, 107, 122, 159], "oldvalu": 0, "omegaconf": [86, 170], "omit": [1, 9, 12, 106, 111, 115, 131, 145], "ompi": [113, 155], "ompi_mca_btl_tcp_if_includ": 158, "ompi_mca_oob_tcp_if_includ": 158, "onboard": [0, 90, 121, 151, 159, 175], "onboard_block": 159, "onboardblock": 0, "onc": [0, 10, 11, 14, 16, 17, 18, 19, 21, 26, 27, 39, 64, 83, 92, 102, 107, 110, 113, 115, 117, 118, 119, 129, 139, 145, 151, 158, 159, 172], "ondemand": 1, "one": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 38, 40, 42, 44, 70, 83, 89, 90, 91, 92, 94, 96, 97, 101, 102, 107, 108, 114, 115, 116, 117, 118, 119, 120, 121, 123, 125, 128, 129, 130, 131, 132, 136, 137, 138, 139, 143, 144, 145, 146, 148, 150, 151, 155, 159, 160, 163, 164, 165, 170, 174, 175, 178, 182], "ones": [0, 10, 20, 86, 108, 122, 159, 163, 170], "oneshot": [13, 123, 145, 159], "oneshotallreduc": 13, "oneshotar": 13, "onevis": [154, 164], "ongo": [8, 10, 16, 21, 29, 30, 32, 33, 42, 131], "onli": [0, 1, 2, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 41, 44, 45, 50, 64, 65, 66, 72, 83, 84, 85, 86, 91, 92, 94, 96, 97, 98, 102, 103, 104, 105, 107, 109, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 124, 125, 126, 127, 129, 130, 131, 136, 137, 138, 139, 142, 143, 144, 145, 146, 148, 150, 151, 154, 159, 161, 164, 166, 168, 169, 170, 176, 182], "onlin": [5, 11, 24, 26, 29, 55, 103, 109, 164], "only_cross_attent": 146, "onnx": [21, 23, 145], "onnx__gathernd": 145, "ontario": 30, "onto": [95, 96, 108, 118], "oom": [1, 2, 3, 6, 15, 29, 30, 31, 32, 33, 34, 35, 151], "ootb": [15, 164], "op": [0, 1, 9, 15, 21, 83, 92, 105, 114, 119, 145, 159, 164], "op_and": 145, "op_or": 145, "op_xor": 145, "opaqu": 119, "opaque_st": 159, "open": [3, 11, 13, 15, 16, 20, 43, 73, 76, 105, 112, 113, 118, 124, 135, 155, 162, 163, 164], "openai": [9, 11, 17, 18, 20, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 44, 69, 81, 92, 94, 99, 109, 161, 163, 164, 172, 177], "openaiapi": 11, "openelm": [90, 175], "openipc": 1, "openmpi": 164, "opensora": 164, "opensourc": 164, "openssh": 132, "opentelemetri": [27, 159], "oper": [0, 1, 8, 10, 12, 13, 15, 16, 17, 20, 23, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 41, 42, 44, 64, 71, 83, 89, 90, 91, 95, 96, 115, 117, 118, 119, 123, 125, 128, 129, 130, 136, 138, 139, 143, 145, 148, 151, 154, 159, 164, 166, 167, 170, 174, 175, 176, 181], "opportun": [11, 44, 136], "opposit": [10, 66], "opt": [7, 15, 84, 86, 89, 90, 104, 115, 128, 132, 145, 153, 154, 155, 164, 168, 170, 172, 174, 175], "opt_batch_s": [147, 159], "opt_num_token": [23, 147, 159], "optforcausallm": [128, 147], "optim": [1, 3, 4, 5, 6, 7, 9, 11, 16, 18, 21, 22, 23, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 41, 44, 45, 55, 60, 64, 66, 71, 86, 89, 90, 91, 95, 97, 98, 101, 102, 103, 104, 105, 107, 109, 110, 113, 115, 118, 119, 120, 123, 124, 125, 129, 131, 136, 138, 139, 140, 141, 145, 148, 151, 154, 155, 156, 157, 158, 159, 163, 164, 166, 167, 170, 172, 174, 175, 176, 178, 179, 181], "optimaladapters": [0, 1], "option": [0, 1, 4, 9, 14, 20, 22, 23, 24, 26, 27, 31, 38, 39, 40, 43, 44, 45, 50, 56, 57, 66, 68, 69, 70, 71, 72, 74, 83, 85, 86, 91, 92, 97, 102, 104, 106, 107, 109, 111, 112, 113, 115, 118, 119, 120, 123, 124, 125, 127, 131, 135, 136, 137, 138, 140, 141, 142, 145, 148, 150, 151, 155, 158, 159, 160, 163, 164, 165, 167, 169, 170, 172, 176, 177, 180, 181], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 147, "optvec": 1, "orchestr": [0, 16, 17, 42, 91, 92, 96, 109, 125, 155, 159, 164, 176], "orchestrator_typ": [105, 159], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 3, 10, 12, 40, 44, 45, 86, 87, 92, 97, 108, 112, 114, 117, 120, 130, 136, 139, 144, 145, 146, 151, 159, 170, 171], "ordin": 159, "org": [0, 1, 23, 29, 30, 32, 33, 101, 113, 116, 122, 145, 148, 153, 178], "organ": [38, 39, 120, 181], "orient": [8, 15, 16, 17], "origin": [0, 10, 12, 14, 15, 16, 19, 20, 24, 30, 41, 66, 83, 89, 95, 99, 102, 107, 108, 117, 119, 122, 123, 145, 159, 160, 164, 165, 174, 177], "original_batch": 19, "original_max_po": 145, "original_max_position_embed": [145, 146], "originaltemperatur": 1, "orin": 21, "oserror": 164, "osl": [3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 44, 45, 109, 136, 143], "osl256": 17, "oss": [104, 109, 124, 161, 162], "ostream": [0, 1], "other": [0, 1, 3, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 31, 34, 38, 39, 41, 44, 45, 50, 57, 67, 68, 69, 82, 83, 91, 92, 97, 98, 102, 103, 105, 107, 108, 110, 114, 115, 116, 117, 118, 121, 123, 124, 125, 129, 130, 131, 137, 138, 139, 142, 143, 144, 145, 148, 150, 151, 155, 158, 159, 161, 164, 167, 176, 182], "other_audio_input": 150, "other_decoder_input": 150, "other_vision_input": 150, "othercach": 1, "otherwis": [0, 1, 10, 11, 20, 34, 44, 50, 64, 65, 83, 92, 107, 108, 114, 115, 117, 118, 136, 145, 150, 155, 159, 167, 170], "otlp_traces_endpoint": [27, 159], "our": [2, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 38, 44, 45, 55, 59, 60, 61, 63, 64, 87, 89, 100, 102, 108, 113, 136, 139, 142, 143, 145, 155, 156, 158, 160, 163, 164, 165, 171, 174], "ourselv": 108, "out": [0, 1, 3, 4, 5, 6, 10, 12, 13, 14, 15, 16, 18, 19, 21, 29, 30, 32, 33, 34, 35, 36, 39, 40, 43, 45, 55, 67, 68, 69, 92, 96, 100, 102, 108, 114, 122, 131, 135, 139, 142, 143, 145, 151, 158, 163, 164, 170], "out_bia": 146, "out_channel": 146, "out_context_dim": 146, "out_dim": 146, "out_featur": [128, 129, 146], "out_hidden_s": 145, "out_of_tree_exampl": [160, 165], "out_point": 145, "out_proj": 170, "out_tp": [3, 6], "outcom": [20, 64, 65], "outdim": 1, "outdimfirst": 1, "outer": [86, 145, 170], "outermost": 11, "outgrow": 19, "outlin": [43, 135], "outlook": 109, "outperform": 17, "output": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 28, 31, 34, 35, 36, 37, 38, 42, 43, 44, 45, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 83, 92, 94, 95, 96, 99, 102, 106, 108, 109, 113, 117, 118, 119, 121, 122, 125, 129, 135, 137, 138, 139, 140, 141, 143, 144, 145, 146, 148, 150, 155, 156, 158, 159, 163, 164, 166, 167, 177, 182], "output_a": 65, "output_b": 65, "output_bench": 26, "output_ctx0": 17, "output_ctx1": 17, "output_cum_log_prob": 150, "output_dim": 146, "output_dir": [23, 24, 122, 126, 127, 128, 129, 131, 136, 138, 147, 149, 155], "output_directori": 159, "output_dtyp": [145, 146], "output_gen0": 17, "output_gen1": 17, "output_generation_logit": 150, "output_id": 150, "output_json": 22, "output_length": 31, "output_log_prob": 150, "output_multiplier_scal": 147, "output_pad": [145, 146], "output_path": [16, 29, 32, 33], "output_s": 146, "output_seqlen": [3, 6], "output_sequence_length": 150, "output_str": 11, "output_timing_cach": [23, 159], "output_token": [44, 136], "outputbuff": 1, "outputconfig": [0, 50, 115, 164], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 115], "outsid": [83, 92, 108, 114, 125, 131, 163, 167], "outsiz": 1, "outstand": [10, 11, 12, 14, 20], "outtpsplitdim": 1, "outweigh": 138, "over": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 29, 30, 32, 33, 34, 35, 43, 72, 82, 84, 95, 102, 121, 125, 130, 133, 135, 136, 138, 142, 143, 145, 164, 168, 172], "overal": [2, 8, 10, 11, 12, 14, 15, 16, 17, 26, 42, 83, 92, 102, 115, 117, 121, 123, 125, 138, 139, 142, 143, 144, 160, 165], "overcom": [13, 83, 117, 129], "overflow": 1, "overhead": [0, 8, 10, 12, 13, 14, 15, 17, 19, 21, 42, 71, 83, 92, 96, 101, 103, 108, 114, 115, 129, 138, 159, 164, 167, 178], "overiew": 136, "overkil": 34, "overlap": [0, 10, 13, 14, 15, 16, 86, 89, 93, 100, 106, 107, 109, 114, 125, 157, 159, 161, 164, 170, 174, 182], "overlap_schedul": 74, "overload": [0, 1], "overrid": [1, 11, 27, 50, 86, 109, 130, 131, 145, 150, 159, 164, 170, 172], "overridden": [40, 110, 159], "override_field": 147, "overse": 38, "overshadow": 138, "oversubscrib": [21, 137, 158], "overus": 39, "overview": [2, 7, 8, 16, 41, 43, 44, 67, 68, 69, 89, 108, 109, 110, 114, 115, 120, 135, 136, 137, 157, 166, 172, 174], "overwhelm": [34, 70], "overwrit": [22, 24, 26, 27, 83, 86, 117, 170], "own": [0, 1, 2, 10, 11, 14, 16, 18, 19, 21, 44, 50, 67, 86, 89, 90, 91, 105, 107, 108, 110, 121, 125, 128, 129, 130, 131, 160, 165, 170, 174, 175], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 9, 18, 21, 26, 29, 30, 32, 33, 42, 66, 67, 68, 69, 72, 106, 107, 118, 125, 132, 147, 150, 159, 163, 164, 180], "p2p": [16, 96, 145], "p50": [44, 45, 136], "p90": [44, 45, 136, 137], "p95": [44, 45, 136, 137], "p99": [26, 29, 30, 32, 33, 44, 45, 136, 137], "p_max": 0, "p_x": 0, "pa": 34, "pack": [0, 1, 10, 23, 97, 102, 118, 144, 145, 147, 148, 151, 160, 165], "packag": [24, 44, 89, 110, 113, 115, 136, 137, 155, 164, 174], "packed_length": 147, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "packet": 12, "pad": [0, 1, 10, 15, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 42, 83, 86, 102, 107, 118, 119, 122, 145, 146, 148, 150, 151, 159, 164, 170], "pad_id": [150, 159], "pad_lda": 146, "pad_ldc": 146, "pad_token_id": 150, "padding_2d": 145, "padding_back": 145, "padding_bottom": 145, "padding_en": [26, 164], "padding_front": 145, "padding_left": 145, "padding_mod": 146, "padding_right": 145, "padding_sid": 11, "padding_top": 145, "padid": 0, "page": [1, 5, 23, 31, 39, 44, 85, 88, 89, 107, 109, 114, 118, 121, 129, 136, 137, 139, 145, 148, 151, 159, 162, 164, 167, 169, 172, 173, 174], "page_s": [20, 159], "paged_context_fmha": [139, 164], "paged_kv_cach": [23, 122, 136, 148, 150], "paged_st": [23, 148, 150], "pagedcontextfmha": 1, "pagedkvcach": 118, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "paid": 16, "pair": [0, 1, 3, 14, 19, 97, 139, 143, 145, 159], "panel": 11, "panoram": 34, "paper": [4, 10, 14, 15, 17, 83, 92, 95, 103, 107, 114, 122, 125, 153, 167], "par": [142, 143], "parallel": [0, 1, 2, 3, 5, 6, 8, 10, 11, 14, 17, 18, 19, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 45, 63, 67, 68, 69, 73, 83, 89, 92, 93, 98, 105, 108, 109, 114, 115, 117, 118, 125, 128, 129, 139, 140, 141, 145, 146, 147, 148, 151, 158, 159, 160, 161, 162, 164, 165, 170, 174, 182], "parallel_attent": [128, 147], "parallel_config": [103, 159], "parallelconfig": [0, 38, 164], "parallelprocess": 11, "param": [0, 1, 55, 60, 61, 62, 63, 113, 130, 145, 146, 150, 156, 158, 159, 163], "paramet": [0, 1, 2, 8, 9, 11, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 38, 39, 40, 44, 66, 67, 68, 69, 72, 82, 83, 84, 86, 91, 92, 94, 97, 98, 99, 102, 106, 115, 116, 117, 120, 121, 122, 125, 126, 128, 129, 130, 131, 136, 138, 139, 140, 141, 144, 145, 146, 147, 148, 150, 151, 159, 164, 167, 168, 176, 177, 180], "parametr": [150, 159], "params_imply_greedy_decod": 159, "parent": [0, 1, 44, 130, 131, 136, 159], "parenthash": 0, "parentid": 1, "pareto": [10, 17, 20, 95], "pari": [35, 55, 60, 61, 62, 63, 94, 113, 156, 158, 163], "park": 34, "pars": [1, 22, 86, 91, 159, 170, 176], "parse_arg": [65, 70, 71, 73], "parse_argu": 73, "parse_fil": 159, "parse_obj": 159, "parse_raw": 159, "parser": [27, 65, 70, 71, 73, 81, 86, 91, 109, 159, 170, 176], "part": [1, 10, 11, 15, 19, 30, 31, 34, 39, 41, 45, 97, 107, 108, 109, 110, 115, 116, 119, 129, 130, 131, 142, 143, 144, 145, 150, 151, 158, 159, 164], "part2": 164, "parti": [10, 41, 112, 113, 164], "partial": [0, 8, 10, 13, 39, 42, 64, 95, 96, 116, 121, 129, 138, 150, 159, 170], "particip": [0, 20, 145, 164], "participantid": [0, 114], "particular": [0, 10, 30, 40, 97, 107, 115, 140, 141, 142, 143], "particularli": [8, 10, 12, 13, 15, 16, 17, 20, 42, 86, 92, 110, 113, 143, 170, 181], "partit": [21, 31, 67, 68, 69, 83, 95, 103, 108, 117, 122, 129, 159], "pass": [0, 1, 10, 16, 20, 21, 22, 24, 29, 30, 32, 33, 34, 35, 38, 39, 42, 43, 44, 50, 64, 66, 70, 82, 83, 85, 91, 94, 96, 103, 106, 107, 108, 115, 117, 119, 121, 122, 125, 129, 130, 135, 136, 137, 139, 142, 143, 145, 146, 147, 148, 150, 151, 158, 159, 160, 164, 165, 166, 167, 169, 176, 179, 182], "password": 9, "past": [0, 14, 16, 17, 83, 98, 102, 117], "past_key_valu": [145, 146], "past_key_value_length": 146, "past_key_values_length": 146, "past_kv_length": 150, "past_sequence_length": 150, "patch": [146, 150], "patch_siz": [146, 147], "path": [0, 1, 2, 9, 14, 16, 18, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 43, 44, 45, 50, 55, 60, 61, 62, 63, 64, 67, 68, 69, 70, 72, 74, 76, 78, 83, 85, 91, 99, 104, 107, 108, 110, 113, 115, 117, 124, 125, 128, 130, 135, 136, 137, 139, 145, 150, 156, 158, 159, 163, 164, 169, 170, 172, 176, 177], "path1": 31, "path2": 31, "path_to_llama_from_hf": 166, "pathlib": [64, 76, 78, 159], "pathlik": 147, "pathorn": 164, "pathsoffset": 1, "pattern": [8, 10, 13, 15, 16, 19, 20, 28, 29, 30, 36, 37, 39, 65, 71, 84, 90, 94, 103, 108, 116, 145, 159, 164, 168, 170, 175], "patternanalyz": 119, "patternrewrit": 119, "paus": [0, 16, 144, 182], "paused_request": 182, "payload": 12, "pcie": [16, 123], "pd": [16, 164], "pdf": [0, 116, 122], "pdl": [13, 20, 21, 164], "peak": [0, 2, 3, 4, 8, 13, 45], "peer": [16, 96], "peft": 159, "peft_cache_config": [50, 99, 159, 164, 177], "peftcacheconfig": [0, 99, 159, 177], "peftcachemanag": [0, 164], "penal": [0, 118, 159], "penalti": [0, 8, 106, 159, 164, 180], "penalty_alpha": 118, "pend": [64, 182], "pending_load": 64, "pending_request": 182, "pennsylvania": [30, 31, 34], "peopl": [11, 12, 24, 34], "pep": 10, "per": [0, 1, 2, 3, 5, 6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 41, 44, 45, 65, 67, 68, 69, 82, 83, 92, 95, 96, 97, 98, 102, 104, 108, 114, 115, 117, 118, 120, 123, 125, 129, 131, 136, 137, 138, 139, 145, 146, 148, 151, 153, 159, 164], "per_channel": 153, "per_group": 153, "per_token": 153, "per_token_scal": 145, "per_worker_gpu_shar": 159, "perceiv": 4, "percent": [0, 126], "percentag": [22, 44, 45, 122, 126, 136, 137], "percentil": [26, 29, 30, 31, 32, 33, 34, 35, 44, 136, 164], "perf": [0, 2, 15, 17, 21, 30, 45, 81, 109, 145, 159, 164], "perf_best_practic": 164, "perf_metrics_max_request": 159, "perfect": [8, 16, 17], "perfectli": [11, 16], "perform": [0, 1, 3, 5, 6, 11, 14, 15, 23, 26, 27, 28, 31, 36, 37, 41, 42, 44, 45, 50, 64, 71, 83, 89, 90, 91, 92, 95, 96, 97, 98, 100, 102, 103, 105, 107, 109, 110, 114, 115, 117, 118, 119, 122, 124, 129, 130, 131, 136, 138, 141, 142, 144, 145, 148, 150, 154, 158, 159, 160, 163, 164, 165, 167, 170, 172, 174, 175, 176, 181], "performantli": 3, "perhap": 97, "period": [0, 8, 16, 97, 159], "permiss": 164, "permut": [108, 145], "permute_2": 108, "perplex": [90, 175], "persimmon": 164, "persist": [7, 8, 20, 64, 96], "persistentkvcacheconnector": 64, "persistentkvcacheconnectorlead": [64, 96], "persistentkvcacheconnectormetadata": [64, 96], "persistentkvcacheconnectorwork": 64, "person": [66, 70, 132], "perspect": [8, 10, 20, 103], "pertain": [86, 170], "phase": [0, 3, 6, 8, 10, 11, 13, 14, 15, 16, 17, 20, 23, 41, 92, 95, 96, 98, 102, 107, 108, 114, 119, 125, 136, 140, 141, 142, 143, 144, 145, 148, 151, 159, 162, 164, 167, 181], "phi": [86, 90, 104, 145, 152, 153, 154, 161, 162, 164, 170, 175], "phi3config": 147, "phi3forcausallm": [147, 154, 161], "phi3model": 147, "phi4mmforcausallm": [154, 161], "phiconfig": 147, "phiforcausallm": 147, "phimodel": 147, "physic": [20, 31, 145, 151], "pick": [36, 142], "pickl": 164, "picklabl": 96, "pictur": [30, 34], "pie": 19, "piec": [1, 16, 102, 142], "piecewis": [109, 159, 164], "pil": [22, 76], "pillar": 12, "pin": [0, 1, 121], "ping": 164, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [2, 9, 24, 29, 30, 31, 32, 33, 34, 35, 89, 105, 110, 112, 164, 174], "pip3": [89, 113, 174], "pipefail": [30, 31, 34, 35], "pipelin": [0, 1, 3, 6, 21, 22, 23, 24, 27, 42, 45, 63, 86, 101, 108, 109, 115, 118, 129, 136, 140, 141, 148, 151, 159, 162, 164, 170, 182], "pipeline_parallel_s": [27, 31, 63, 138, 139, 159], "pipelineparallel": [0, 1, 118], "pipelineparallelismrank": 1, "pitfal": [108, 121, 131], "pixart": 146, "pixartalphatextproject": 146, "pixel": 100, "pixel_valu": 147, "pkl5": 164, "pl": [44, 113, 136], "place": [1, 10, 16, 20, 23, 30, 38, 97, 108, 113, 116, 145, 148, 160, 164, 165], "placemen": 16, "placement": [13, 16, 103, 159], "placement_bundle_indic": 159, "placement_group": 159, "plai": [21, 102, 142], "plain": 34, "plan": [13, 16, 17, 20, 34, 41, 89, 110, 115, 117, 159, 164, 174], "plane": 41, "planner": 164, "plateau": 102, "platform": [8, 16, 21, 33, 44, 52, 55, 60, 61, 63, 110, 113, 132, 133, 136, 156, 157, 158, 163, 164], "pleas": [2, 3, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 29, 30, 32, 33, 44, 45, 50, 52, 59, 71, 83, 86, 92, 94, 95, 97, 98, 100, 104, 105, 108, 110, 111, 114, 117, 119, 123, 125, 127, 133, 136, 138, 140, 141, 145, 155, 156, 157, 158, 159, 164, 170, 182], "plot": [17, 19], "plu": [12, 16, 17, 90, 123, 150, 175], "plug": 21, "plugin": [83, 91, 110, 117, 118, 119, 126, 128, 142, 145, 147, 151, 153, 155, 159, 164, 176], "plugin_config": [139, 143, 145, 147, 159], "plugin_namespac": 119, "plugin_typ": 119, "plugin_v2": 119, "plugin_v2_gemm_0": 155, "pluginconfig": [148, 159], "pluginfield": 164, "pluginv2build": 155, "pm": [2, 13, 44, 136], "pmi": 155, "pmi2_init": 155, "pmix": [27, 67, 68, 69, 129, 155], "png": [27, 47, 54, 76], "po": 146, "point": [1, 4, 7, 11, 16, 17, 20, 21, 24, 34, 36, 42, 45, 59, 63, 67, 83, 89, 95, 96, 102, 104, 108, 117, 129, 138, 144, 145, 153, 155, 159, 163, 164, 174], "pointer": [0, 1, 10, 16, 108, 118, 130, 145, 150, 164], "pointerelementtyp": 1, "pointermemorymap": 1, "polar": [154, 161], "polic": 34, "polici": [0, 1, 16, 19, 21, 22, 44, 57, 103, 136, 137, 151, 159], "poll": [0, 27, 96], "polyhedr": 129, "pong": 164, "pool": [0, 1, 15, 16, 17, 19, 20, 83, 92, 97, 102, 107, 117, 145, 150, 159, 164, 181, 182], "pooled_project": [146, 147], "pooled_projection_dim": 146, "pooledpin": 0, "poor": 8, "popd": 155, "popfirstgentoken": 0, "popul": [1, 79, 83, 94, 117, 129, 145, 159], "popular": [7, 11, 14, 109, 117, 128, 131, 133, 158, 162, 163], "port": [0, 9, 17, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 41, 51, 92, 133, 163], "portfolio": 5, "portion": [95, 103, 116, 138, 145, 151], "pos_emb_typ": 145, "pos_embd_param": [83, 167], "pos_embed_max_s": 146, "pos_embed_typ": 146, "pose": [38, 143], "posit": [0, 1, 12, 13, 14, 65, 125, 136, 145, 146, 150, 159, 164, 167], "position_embed": [145, 146], "position_embedding_typ": [83, 117, 128, 145, 146, 147], "position_encoding_2d": 147, "position_id": [83, 147, 150, 155, 160, 165, 167], "positionalembeddingparam": [83, 167], "positionembeddingtyp": [83, 117, 145, 146, 147], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 10, 11, 12, 14, 15, 16, 17, 20, 23, 29, 30, 31, 32, 33, 34, 35, 38, 40, 43, 45, 50, 83, 92, 102, 106, 108, 110, 113, 114, 115, 117, 118, 121, 125, 129, 135, 136, 139, 142, 144, 145, 151, 155, 159, 164, 166], "possibli": [1, 108, 120, 145], "post": [0, 4, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 20, 42, 43, 45, 82, 92, 93, 128, 135, 145, 159, 161, 163, 164], "post_act_fn": 146, "post_attention_layernorm": [130, 160, 165], "post_input_id": 150, "post_layernorm": [127, 128, 130, 145, 155], "post_merg": 39, "post_pad": 145, "post_proc": 164, "post_prompt": 150, "post_strid": 145, "posterior_threshold": 159, "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postproc_param": 159, "postproc_work": 159, "postprocess": [27, 95, 146, 159, 164], "postprocess_tokenizer_dir": 159, "postprocessor": [0, 159], "postprocparam": 159, "postprocwork": 159, "potenti": [0, 1, 8, 10, 11, 12, 15, 16, 19, 20, 23, 43, 44, 97, 120, 125, 135, 136, 139, 148, 160, 165], "pow": 145, "power": [5, 7, 12, 13, 15, 16, 20, 21, 66, 86, 94, 97, 108, 121, 129, 142, 164, 170], "pp": [0, 3, 6, 17, 22, 27, 41, 44, 45, 92, 114, 118, 122, 136, 137, 145, 148, 164], "pp1": 45, "pp2": [17, 45, 92, 136], "pp4": 45, "pp8": 45, "pp_communicate_final_output_id": 150, "pp_communicate_new_token": 150, "pp_partit": 159, "pp_reduce_scatt": [23, 143, 148], "pp_size": [22, 24, 27, 45, 51, 108, 128, 129, 136, 138, 149, 164], "ppreducescatt": 1, "pq": 159, "pr": [10, 12, 13, 16, 20], "practic": [4, 5, 13, 15, 16, 17, 84, 107, 109, 117, 120, 129, 151, 164, 168], "practition": [29, 30, 32, 33], "pre": [0, 1, 16, 17, 20, 21, 24, 26, 31, 35, 36, 39, 40, 42, 44, 67, 68, 69, 83, 92, 103, 107, 108, 110, 112, 113, 115, 117, 128, 136, 145, 151, 159, 162, 163, 164, 167], "pre_input_id": 150, "pre_layernorm": 145, "pre_merg": 39, "pre_onli": 146, "pre_pad": 145, "pre_prompt": 150, "pre_quant_scal": [128, 159], "pre_strid": 145, "prealloc": 108, "prebuilt": 110, "preced": [108, 129, 145], "precis": [1, 2, 3, 7, 11, 16, 20, 21, 22, 23, 29, 34, 44, 104, 109, 118, 130, 136, 139, 143, 148, 151, 154, 159, 162, 164], "precompute_relative_attention_bia": 147, "precomputed_relative_attent": 146, "predefin": [83, 125, 160, 165, 167], "predict": [1, 8, 10, 13, 14, 16, 42, 117, 125, 164], "predictor": 125, "predictsdrafttoken": 1, "prefer": [7, 20, 110, 124, 172], "prefer_managed_weight": 146, "prefer_plugin": 145, "preferenti": 97, "prefetch": 13, "prefil": [0, 10, 12, 15, 16, 17, 22, 27, 28, 29, 30, 36, 37, 64, 73, 92, 93, 95, 96, 107, 108, 109, 140, 141, 152, 159, 161, 162, 164], "prefill_batch_s": 38, "prefix": [11, 14, 19, 26, 29, 30, 31, 32, 33, 34, 35, 38, 39, 71, 96, 97, 107, 115, 125, 128, 137, 145, 155, 158, 159], "prefix_token_ad": 66, "preliminari": [3, 5, 6, 16], "preload": 130, "prem": 21, "premis": [12, 14], "prepar": [0, 12, 13, 14, 16, 24, 29, 30, 32, 33, 42, 43, 68, 83, 92, 102, 106, 107, 108, 114, 135, 142, 145, 147, 153, 164, 167, 180], "prepare_dataset": [45, 109, 136, 137], "prepare_draft_token": 107, "prepare_input": [147, 151], "prepare_position_ids_for_cogvlm": 150, "prepare_recurrent_input": 147, "prepare_resourc": [107, 166, 181], "prepend": 155, "preprocess": [22, 91, 100, 130, 150, 153, 164, 176], "preprocess_weights_hook": 147, "preprocessed_dataset": 22, "preprocessor": [17, 44, 136], "preqrequisit": 113, "prequant_scaling_factor": 128, "prerequisit": [109, 113], "presenc": [0, 118, 129, 159], "presence_penalti": [150, 159, 164], "presencepenalti": [0, 1, 118], "present": [0, 8, 9, 10, 16, 17, 20, 26, 29, 30, 32, 33, 34, 35, 44, 45, 92, 98, 102, 107, 136, 142, 143, 153, 159, 164], "preserv": [108, 139], "presid": [55, 56, 57, 113, 137, 144, 156, 158, 163], "press": [26, 31], "pressur": [16, 20], "pretrained_config": [91, 160, 165, 176], "pretrained_model_name_or_path": 147, "pretrainedconfig": [127, 131, 147, 148, 159, 160, 165], "pretrainedmodel": [131, 147, 151], "pretrainedtoken": 66, "pretrainedtokenizerbas": 159, "prevdrafttokenslen": 1, "prevent": [8, 10, 11, 13, 15, 21, 22, 29, 30, 32, 33, 34, 35, 97, 98, 113, 158, 164], "preview": 164, "previou": [1, 2, 4, 9, 11, 12, 14, 16, 19, 20, 41, 42, 44, 91, 97, 101, 111, 115, 116, 124, 125, 131, 136, 137, 138, 139, 142, 143, 144, 164], "previous": [1, 3, 19, 20, 42, 64, 97, 102, 107, 108, 124, 139, 142, 144, 164], "previous_batch": [42, 101], "previous_tensors_devic": [42, 101], "prevscor": 1, "price": [44, 136], "priem": 64, "primari": [0, 1, 7, 12, 16, 20, 30, 38, 41, 91, 97, 120, 151, 164, 176, 182], "primarili": [2, 12, 20, 41, 83, 108, 167], "primary_kernel": 12, "primit": [15, 16, 20, 129], "principl": [8, 16, 109], "print": [1, 10, 11, 22, 27, 39, 45, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 83, 94, 108, 113, 117, 136, 137, 144, 151, 155, 156, 158, 159, 163], "print_iter_log": [2, 21, 26, 31, 35, 68, 73, 159], "printabl": 159, "prior": [23, 34, 41, 110, 115], "priorit": [7, 8, 42, 97, 102, 105, 142, 144], "prioriti": [0, 1, 86, 97, 120, 121, 130, 159, 170], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 18, 19, 118, 148, 159], "privileg": 119, "prm": [154, 161], "prmreward": 11, "pro": [13, 19, 45, 162, 164], "prob": [145, 164, 180], "probabilist": 146, "probabl": [0, 1, 10, 13, 14, 21, 34, 72, 82, 106, 118, 121, 125, 145, 150, 159, 164], "probe_answ": 11, "probe_respons": 11, "probe_suffix": 11, "probe_suffix_token_num": 11, "probe_task": 11, "probe_text": 11, "probil": 1, "problem": [2, 8, 10, 11, 15, 20, 83, 103, 105, 117, 155, 164], "problemat": [10, 107], "proc": [20, 130], "proccessed_weight": 130, "proccessed_zero": 130, "proce": [11, 17, 20, 92, 110, 159], "procedur": [2, 107], "proceed": [8, 10, 129], "process": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 42, 43, 44, 45, 59, 63, 64, 65, 66, 67, 68, 69, 82, 83, 89, 90, 91, 92, 96, 98, 100, 101, 102, 103, 106, 108, 114, 115, 117, 118, 120, 123, 125, 128, 129, 131, 135, 136, 137, 138, 142, 143, 144, 145, 148, 150, 155, 158, 159, 160, 162, 163, 164, 165, 166, 167, 170, 174, 175, 176, 182], "process_input": 150, "process_logits_including_draft": 150, "processor": [0, 17, 58, 83, 93, 100, 109, 117, 147, 150, 159, 161, 164], "processorbatch": 0, "processormap": 0, "prod": 145, "produc": [0, 1, 10, 11, 12, 21, 44, 50, 82, 95, 102, 106, 107, 108, 115, 119, 129, 136, 137, 139, 142, 143, 145, 164], "product": [5, 8, 9, 16, 17, 19, 20, 21, 24, 35, 64, 66, 83, 84, 86, 90, 92, 96, 102, 103, 116, 117, 125, 129, 142, 143, 144, 145, 167, 168, 170, 175], "profession": 18, "profil": [12, 20, 23, 28, 29, 30, 36, 37, 53, 54, 109, 139, 142, 145, 148, 150, 151, 155, 159, 164], "profiling_verbos": [23, 159], "profit": [44, 125, 136], "program": [10, 11, 20, 55, 60, 61, 63, 64, 113, 131, 144, 155, 156, 158, 163], "progress": [1, 8, 11, 13, 44, 89, 136, 145, 159, 174], "proj": [128, 130, 155], "project": [10, 12, 15, 20, 83, 89, 105, 108, 110, 112, 113, 117, 122, 145, 146, 160, 165, 170, 174, 181], "projector_hidden_act": 147, "prologu": [67, 68, 69], "promin": 125, "promis": [12, 14, 17, 125, 131], "promot": 164, "prompt": [0, 2, 10, 11, 17, 19, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 42, 44, 48, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 77, 78, 82, 83, 85, 86, 92, 94, 96, 97, 99, 102, 106, 107, 113, 115, 118, 121, 125, 127, 136, 137, 142, 143, 144, 146, 150, 156, 158, 159, 163, 164, 167, 169, 170, 177, 180], "prompt_a": 65, "prompt_adapter_request": [159, 164], "prompt_b": 65, "prompt_budget": [73, 159], "prompt_embedding_t": [146, 147, 150], "prompt_embedding_table_s": 147, "prompt_id": 66, "prompt_ignore_length": [150, 159], "prompt_len": [83, 167], "prompt_logprob": [159, 164], "prompt_lookup": 164, "prompt_lookup_num_token": 118, "prompt_tabl": 150, "prompt_task": [147, 150], "prompt_token": [18, 21, 29, 30, 31, 32, 33, 34, 35, 163], "prompt_token_id": [21, 29, 30, 31, 32, 33, 34, 50, 159], "prompt_tokens_detail": 31, "prompt_vocab_s": [147, 150], "promptadapterrequest": 159, "promptignorelength": [0, 1], "promptinput": [159, 164], "promptlen": 0, "promptli": [10, 17, 92], "prompts_dir": 24, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 146, "prompttuningen": 1, "prone": 38, "pronounc": [8, 12, 16, 20, 125], "proof": 181, "prop": 1, "propag": [1, 108, 121, 164, 170], "propel": 8, "proper": [20, 22, 38, 44, 71, 86, 95, 114, 136, 170], "properli": [10, 16, 18, 20, 21, 29, 30, 32, 33, 34, 35, 38, 108, 113, 130, 142, 144], "properti": [19, 40, 56, 59, 79, 94, 115, 145, 147, 148, 150, 159], "proport": [83, 117], "proportion": 19, "propos": [0, 8, 11, 13, 19, 107, 109], "proposer_task": 11, "proprietari": [91, 176], "protect": [1, 38, 63, 158, 164], "proto": [12, 159], "protocol": [0, 10, 12, 17, 20, 27, 41, 59, 92], "protopackeddata": 12, "prototyp": [20, 38, 109, 159, 174], "prototype_control": 11, "proud": [8, 13, 16, 17], "prove": [15, 96, 125], "prover": [90, 175], "provid": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 50, 56, 57, 59, 66, 70, 82, 83, 84, 86, 88, 89, 90, 91, 92, 95, 96, 97, 98, 99, 102, 108, 109, 110, 114, 115, 116, 117, 118, 119, 120, 121, 122, 124, 125, 127, 128, 131, 132, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 150, 151, 154, 155, 158, 159, 160, 162, 163, 164, 165, 166, 167, 168, 170, 173, 174, 175, 176, 177], "provinc": 30, "proxy_dispatch_result_thread": 136, "prune": [10, 119, 125, 145], "pseudo": [83, 110, 117, 145, 153], "pt": [22, 64, 96], "pth": [91, 130, 164, 176], "ptq": [7, 139, 164], "ptr": 1, "ptr_idx": 130, "ptrdiff_t": 1, "ptune": [39, 164], "ptuning_setup": 150, "ptuning_setup_fuyu": 150, "ptuning_setup_llava_next": 150, "ptuning_setup_phi3": 150, "ptuning_setup_pixtr": 150, "ptuningconfig": 0, "public": [0, 1, 7, 38, 133, 164], "publish": [2, 3, 6, 12, 29, 30, 32, 33, 44, 45, 111, 136, 164], "pull": [2, 9, 18, 21, 39, 110, 164], "pull_pipe_addr": 159, "punctuat": 159, "puneeshkhanna": 164, "purchas": [44, 136], "pure": [21, 90, 108, 150], "purpos": [1, 8, 15, 16, 17, 26, 31, 38, 64, 83, 102, 107, 110, 117, 120, 137, 139, 142, 143, 148, 159], "pursu": [55, 60, 61, 63, 113, 156, 158, 163], "push": [15, 16, 109, 132], "push_pipe_addr": 159, "pushd": 155, "put": [1, 10, 13, 20, 30, 34, 35, 67, 68, 69, 94, 102, 113, 128, 142, 158], "put_zcopi": [92, 114], "pwd": [2, 110], "py": [2, 13, 14, 15, 16, 26, 29, 30, 31, 32, 33, 34, 35, 38, 40, 43, 45, 56, 57, 64, 65, 67, 71, 72, 73, 82, 85, 89, 95, 96, 100, 105, 107, 109, 110, 115, 116, 117, 119, 122, 124, 125, 126, 127, 128, 129, 130, 131, 135, 136, 137, 138, 139, 145, 148, 150, 155, 158, 160, 164, 165, 166, 169, 174, 181, 182], "py3": 164, "py_draft_token": 107, "py_executor_cr": 182, "py_rewind_len": 107, "pybind": [108, 159, 164], "pybind_class": 159, "pybind_equ": 159, "pybind_inst": 159, "pybindmirror": 159, "pydant": [38, 86, 94, 148, 159, 164, 170], "pydantic_cor": 159, "pydanticserializationerror": 159, "pydanticundefin": 159, "pyexecutor": [38, 42, 64, 101, 107, 164, 181, 182], "pynvml": 164, "pypi": [110, 112, 164], "pytest": [38, 39, 95], "python": [1, 11, 14, 15, 17, 20, 22, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 41, 42, 43, 44, 45, 50, 61, 62, 64, 65, 67, 69, 71, 72, 73, 85, 86, 89, 90, 94, 96, 99, 105, 108, 113, 117, 118, 119, 122, 125, 127, 129, 131, 135, 136, 137, 138, 153, 158, 159, 160, 162, 163, 164, 165, 166, 169, 170, 174, 175, 177, 181, 182], "python3": [2, 29, 30, 31, 32, 33, 34, 35, 67, 100, 110, 113, 122, 124, 126, 128, 136, 155], "python_bind": 2, "python_e2": 150, "python_plugin": 164, "pythonpath": [2, 68, 69], "pytorch": [2, 10, 11, 14, 15, 16, 21, 22, 24, 27, 29, 30, 32, 33, 39, 42, 45, 56, 57, 58, 64, 73, 83, 84, 90, 91, 92, 99, 100, 104, 106, 107, 108, 109, 110, 113, 119, 125, 128, 145, 158, 159, 160, 164, 166, 167, 168, 175, 176, 177, 179, 180, 181, 182], "pytorch_alloc_conf": 29, "pytorch_extra_arg": 68, "pytorch_model": 155, "pytorch_model_registri": 181, "pytorchconfig": [83, 164, 167], "pytorchmodelengin": [166, 181], "pzzzzz5142": 164, "q": [3, 9, 13, 15, 22, 44, 83, 92, 114, 117, 118, 122, 136, 145, 155, 160, 165, 167], "q_b_proj": 145, "q_dim": 145, "q_lora_rank": [145, 146], "q_proj": [44, 91, 99, 130, 136, 160, 165, 170, 176, 177], "q_scale": [83, 117, 145, 146, 147], "qa": [24, 29, 30, 32, 33, 125], "qformat": [136, 149], "qgmma": 164, "qingquansong": 164, "qk_layernorm": [146, 147], "qk_nope_head_dim": [145, 146], "qk_norm": 146, "qk_rope_head_dim": [145, 146], "qkv": [12, 119, 122, 128, 130, 145, 155, 164, 167], "qkv_bia": [145, 164], "qkv_dim": 145, "qkv_proj": [91, 160, 165, 176], "qo_indptr": [83, 167], "qpi": 123, "qserv": [148, 164], "qserve_gemm_plugin": 148, "quadrat": [83, 117, 151], "qualifi": 39, "qualiti": [14, 16, 21, 106, 139, 143], "qualnam": [145, 147, 149, 159], "quant": [12, 44, 104, 131, 136, 145, 148, 159, 164, 179], "quant_algo": [44, 50, 99, 128, 130, 131, 136, 139, 147, 159, 177], "quant_config": [50, 83, 99, 131, 139, 147, 159, 167, 177], "quant_medusa_head": 149, "quant_mod": [131, 146, 147, 150, 159], "quantalgo": [50, 99, 109, 139, 147, 149, 159, 177], "quantconfig": [50, 83, 99, 109, 131, 139, 147, 159, 164, 167, 177], "quanticonfig": 131, "quantifi": [8, 83], "quantiz": [2, 3, 4, 13, 15, 20, 21, 22, 23, 29, 32, 33, 45, 55, 60, 83, 89, 90, 108, 109, 113, 117, 118, 123, 129, 130, 134, 137, 140, 141, 145, 146, 147, 148, 150, 154, 156, 157, 158, 159, 160, 162, 163, 164, 165, 167, 174, 175], "quantizaton": 136, "quantize_and_export": 149, "quantize_kwarg": 147, "quantize_lm_head": [149, 164], "quantize_per_token_plugin": 148, "quantize_tensor_plugin": 148, "quantized_valu": [83, 117], "quantizedkernel": 129, "quantizetensorplugin": 129, "quantmod": [1, 83, 117, 118, 145, 146, 147, 149, 150, 159], "quantmodewrapp": [145, 159], "quebec": 30, "queen": [30, 34], "queri": [3, 15, 17, 18, 21, 27, 29, 30, 32, 33, 34, 35, 44, 71, 85, 92, 94, 97, 109, 115, 118, 120, 125, 129, 136, 145, 151, 164, 167, 169, 181], "query_dim": 146, "query_key_valu": 130, "query_length": 146, "query_pre_attn_scalar": 147, "question": [11, 16, 19, 30, 31, 44, 70, 136, 151, 155], "queu": [0, 45, 102, 142], "queue": [0, 10, 27, 39, 42, 159, 166], "quick": [30, 34, 35, 56, 57, 89, 117, 136, 137, 162, 167, 174], "quick_gelu": 145, "quicker": 138, "quickli": [20, 100, 131, 163], "quickstart": [31, 56, 57, 137, 158], "quickstart_advanc": [14, 67, 82], "quickstart_multimod": 100, "quit": [26, 31, 119], "quot": [39, 164], "qweight": 130, "qwen": [11, 27, 34, 35, 36, 44, 54, 65, 90, 100, 104, 108, 130, 136, 145, 147, 153, 154, 161, 164, 175], "qwen1": [154, 164], "qwen2": [26, 27, 44, 47, 54, 76, 90, 100, 122, 136, 152, 154, 161, 162, 164, 175], "qwen2_5_vlforconditionalgener": [154, 161], "qwen2_audio_7b_instruct": 39, "qwen2audio": 164, "qwen2forcausallm": [154, 161], "qwen2forprocessrewardmodel": [154, 161], "qwen2forrewardmodel": [154, 161], "qwen2forsequenceclassif": 164, "qwen2vl": 164, "qwen2vlforconditionalgener": [154, 161], "qwen3": [16, 24, 27, 36, 65, 90, 103, 154, 161, 163, 164, 175], "qwen3_cod": 27, "qwen3_output": [34, 35], "qwen3forcausallm": [154, 161], "qwen3mo": [154, 161], "qwen3moeforcausallm": [154, 161], "qwen3next": 161, "qwen3nextforcausallm": 161, "qwenforcausallm": [130, 147], "qwenforcausallmgenerationsess": 150, "qwenvl": 164, "qwq": [90, 154, 161, 175], "qychen": 122, "qzero": 130, "r": [1, 24, 27, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 70, 73, 105, 113, 122, 137, 144, 145, 155, 156, 158, 159, 163, 164], "r1": [10, 11, 12, 16, 20, 27, 74, 81, 90, 103, 104, 107, 109, 162, 164, 175], "r1_in_tensorrt": [13, 164], "race": 164, "radix": [97, 181], "rag": 24, "rai": [109, 159], "rais": [11, 12, 66, 73, 74, 131, 137, 155, 159, 164], "raise_error": 159, "ram": [65, 96], "rand": [22, 44, 136, 145], "rand_data": 145, "rand_data_valid": 147, "randint": 10, "randn": 108, "random": [0, 19, 22, 24, 26, 29, 30, 31, 32, 33, 34, 35, 45, 53, 54, 72, 118, 145, 159, 164], "random_imag": 26, "random_se": [24, 147, 150, 159], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": [44, 45, 136], "randomse": [1, 118, 164], "randomseedtyp": 0, "rang": [0, 1, 10, 11, 12, 17, 18, 22, 35, 43, 44, 64, 66, 89, 97, 108, 118, 121, 125, 135, 136, 143, 145, 147, 151, 153, 154, 155, 158, 159, 160, 162, 165, 174], "rank": [0, 1, 2, 8, 12, 15, 16, 20, 21, 23, 31, 41, 42, 44, 89, 92, 95, 96, 103, 109, 114, 115, 116, 118, 122, 131, 136, 145, 147, 150, 151, 155, 159, 164, 170, 174], "rank0": 128, "rank1": 128, "rapid": [12, 45, 125], "rapidli": 18, "rate": [0, 2, 10, 12, 13, 14, 15, 17, 22, 26, 29, 30, 32, 33, 42, 44, 45, 53, 54, 65, 107, 136, 137, 164], "rather": [15, 16, 18, 28, 29, 30, 36, 37, 64, 83, 108, 113, 117, 119, 125], "ratio": [15, 16, 17, 19, 31, 97, 159], "ration": [17, 92], "rational": 15, "raw": [21, 27, 42, 82, 91, 100, 176], "raw_audio": 150, "raw_imag": 150, "ray_orchestr": 105, "ray_placement_config": 159, "ray_worker_extension_cl": 159, "rayexecutor": [105, 159], "raygpuwork": [105, 159], "rayplacementconfig": 159, "rc": [29, 30, 32, 33], "rcn": [29, 30, 32, 33], "rdma": [17, 92, 114], "re": [2, 7, 11, 16, 18, 29, 30, 32, 33, 34, 35, 83, 96, 102, 107, 159, 162, 164, 167], "reach": [0, 11, 17, 20, 42, 83, 102, 117, 128, 136, 139, 144, 158, 159], "reachabl": 40, "react": 16, "read": [0, 2, 10, 12, 13, 14, 16, 20, 23, 41, 44, 70, 83, 98, 114, 115, 117, 125, 127, 129, 130, 136, 159, 164], "read_config_from_the_custom_training_checkpoint": 131, "read_input": 73, "readabl": [38, 42, 44, 136], "reader": 145, "readi": [0, 8, 9, 10, 12, 18, 21, 29, 30, 31, 32, 33, 34, 35, 42, 89, 100, 110, 159, 172, 174], "readm": [105, 125, 137, 156, 158, 164], "real": [2, 8, 13, 16, 19, 20, 21, 22, 44, 94, 96, 98, 103, 107, 108, 110, 119, 124, 137, 139, 142, 143, 145, 155], "realist": [102, 172], "realiti": 142, "realiz": [121, 125], "realli": 34, "rearrang": 145, "reason": [0, 8, 10, 11, 13, 14, 16, 18, 24, 27, 30, 31, 34, 39, 42, 44, 81, 83, 90, 102, 107, 108, 109, 117, 118, 129, 131, 136, 138, 142, 143, 145, 155, 159, 170, 175], "reasoning_cont": [21, 30, 31, 34], "reasoning_pars": [27, 51, 159], "reassess": 11, "rebalanc": 16, "rebuild": [40, 102, 143, 145, 155, 159], "receiv": [0, 1, 12, 16, 17, 20, 26, 29, 30, 32, 33, 34, 35, 92, 96, 103, 114, 115, 116, 123, 125, 139, 145, 159, 164], "recent": [1, 4, 8, 11, 12, 13, 40, 83, 97, 117, 124], "recept": [17, 92], "recip": [13, 15, 104, 109, 153, 159], "reclaim": 0, "recogn": [13, 16, 17, 44, 108, 125, 136, 160, 165], "recommend": [2, 4, 7, 10, 15, 16, 17, 21, 24, 26, 27, 31, 36, 38, 44, 66, 83, 92, 102, 108, 110, 114, 117, 118, 125, 127, 130, 136, 140, 141, 142, 144, 155, 159, 160, 164, 165, 167], "recomput": [64, 65], "recompute_scale_factor": 145, "reconfigur": [95, 113, 115], "reconnect": 105, "reconstruct": [83, 117, 145], "record": [1, 2, 10, 13, 14, 16, 44, 119, 159, 162], "record_cr": 159, "record_ev": 108, "record_event_1": 108, "record_event_2": 108, "record_event_3": 108, "record_event_4": 108, "record_stat": 159, "record_stream": 108, "record_stream_1": 108, "recored": 0, "recov": [65, 159], "rectangular": 30, "recur": [64, 65], "recurr": [10, 125], "recurrentgemma": [153, 154, 164], "recurrentgemmaforcausallm": 147, "recurs": [2, 18, 86, 110, 158, 170], "recv": [0, 16, 129, 145], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [83, 102, 117, 181], "redesign": [12, 164], "redirect": [119, 159], "redistribut": [16, 20, 103], "redownload": [21, 29, 30, 32, 33], "redraft": [145, 147, 150, 164], "redrafter_draft_len_per_beam": 150, "redrafter_num_beam": 150, "redrafterforllamalm": 147, "redrafterforqwenlm": 147, "redraftermixin": 147, "reduc": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 29, 30, 32, 33, 34, 35, 39, 42, 43, 44, 45, 64, 66, 71, 83, 92, 97, 98, 101, 102, 103, 104, 107, 108, 110, 114, 115, 116, 117, 121, 123, 125, 129, 135, 136, 137, 138, 142, 144, 145, 148, 151, 155, 158, 159, 162, 164, 167, 178], "reduce_fus": [23, 136, 139, 143, 148, 164], "reduce_scatt": [20, 145], "reduceoper": 145, "reducescatt": [20, 23, 143, 148, 164], "reduct": [8, 11, 13, 123, 125, 144, 145, 164], "redund": [8, 13, 16, 38, 64, 97, 108, 125, 164], "redux": 164, "reevalu": 41, "ref": 73, "ref_templ": 159, "refactor": [2, 12, 38, 107, 131, 164], "refer": [0, 1, 2, 9, 12, 16, 17, 18, 20, 21, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 66, 71, 73, 75, 76, 77, 78, 79, 80, 83, 85, 86, 89, 90, 92, 100, 105, 108, 110, 111, 113, 114, 115, 117, 118, 119, 120, 122, 125, 129, 131, 133, 136, 137, 138, 139, 140, 141, 143, 145, 154, 156, 158, 160, 162, 163, 164, 165, 167, 169, 170, 174], "referenc": 139, "reference_wrapp": [0, 115], "references_commit": 38, "refin": [20, 38, 164], "refit": [23, 129, 159, 164], "refit_engin": 129, "reflect": [8, 20, 38, 92, 114, 142], "refresh": [2, 136], "regard": [89, 105, 145, 174], "regardless": [11, 20, 91, 155, 159, 176], "regex": [24, 94, 115, 159], "region": [0, 11, 30, 43, 108, 135], "regist": [0, 16, 20, 64, 91, 132, 155, 159, 160, 164, 165, 176], "register_auto_model": [160, 165], "register_checkpoint_load": [91, 159, 176], "register_checkpoint_weight_load": [91, 176], "register_config_load": [91, 176], "register_error": 159, "register_fak": 108, "register_kv_cach": [64, 96], "register_mapp": [91, 176], "register_network_output": 155, "registerdesc": 0, "registermemori": 0, "registr": [38, 41], "regress": [29, 32, 107, 117, 118, 129, 164], "regular": [0, 13, 64, 115, 117, 145, 159], "regularli": 11, "reimplement": 12, "reinforc": [105, 140, 141], "reject": [0, 14, 107, 109], "rel": [3, 8, 16, 83, 102, 121, 142, 144, 145, 164], "rel_attn_t": 146, "relat": [12, 19, 30, 31, 43, 89, 108, 111, 114, 116, 120, 130, 135, 145, 148, 151, 155, 157, 158, 160, 164, 165, 174, 181], "relationship": [8, 41, 151], "relative_attent": [145, 146], "relative_attention_bia": 145, "relax": [8, 16, 102, 107, 109, 117], "relaxed_delta": [13, 14, 74, 107, 159], "relaxed_topk": [13, 14, 74, 107, 159], "releas": [1, 3, 6, 7, 9, 10, 16, 17, 20, 21, 26, 29, 30, 32, 33, 38, 41, 83, 91, 92, 96, 97, 109, 110, 112, 117, 118, 120, 131, 145, 147, 151, 153, 154, 159, 163, 176], "release_build": [18, 31, 34, 35, 110], "release_run": [31, 34, 35, 110], "releasepag": 1, "releasest": 0, "releasewithtag": 1, "relev": [34, 40, 91, 94, 110, 118, 181], "reli": [16, 17, 20, 41, 43, 90, 98, 108, 114, 117, 119, 131, 135, 153, 158, 175], "reliabl": [89, 105, 174], "reload": [16, 96, 115], "relu": [128, 129, 145, 155], "remain": [0, 1, 8, 12, 13, 16, 20, 38, 39, 45, 86, 97, 102, 105, 110, 119, 121, 125, 126, 137, 139, 142, 143, 145, 151, 159, 164, 170], "remaind": [64, 97, 139], "remaining_chunk": 64, "remaining_token": 64, "remark": [13, 14, 20], "remateri": 1, "remedi": 10, "rememb": 16, "remind": [94, 117, 167], "remot": [0, 1, 12, 16, 22, 41, 96, 159], "remotenam": 0, "remov": [0, 1, 2, 10, 12, 14, 17, 21, 23, 24, 27, 38, 39, 83, 92, 97, 102, 107, 108, 110, 117, 118, 119, 120, 129, 130, 139, 145, 148, 151, 159, 160, 164, 165], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 147, "remove_input_pad": [23, 102, 117, 122, 145, 146, 148, 150], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 181, "removeprefix": 159, "removesuffix": 159, "renam": [38, 164], "render": 38, "rendezv": [92, 114], "reopen": 40, "reorder": [145, 146], "reorder_kv_cache_for_beam_search": 150, "rep": [43, 135], "repeat": [0, 14, 15, 64, 71, 97, 98, 117, 145, 159], "repeat_interleav": 145, "repeatedli": 125, "repetit": [0, 19, 66, 118, 145, 159], "repetition_penalti": [118, 150, 159, 164], "repetitionpenalti": [0, 1, 118], "replac": [2, 9, 11, 15, 18, 20, 29, 32, 33, 44, 65, 67, 68, 69, 102, 103, 108, 113, 119, 129, 130, 131, 136, 137, 139, 144, 145, 151, 159, 160, 165], "replace_add_with_sub": 119, "replace_all_uses_with": [119, 145], "replace_input_with": 119, "replace_output_uses_with": 119, "replace_outputs_uses_with": 119, "replai": [10, 16, 71, 108], "repli": [94, 159], "replic": [0, 13, 16, 115, 145, 170], "replica": 103, "replit": [153, 154, 164], "repo": [9, 21, 36, 131, 137, 155, 164], "repo_id": 70, "report": [14, 15, 16, 18, 22, 43, 44, 45, 120, 135, 136, 151, 164], "report_json": [21, 22], "report_load_statist": 16, "reportpluginerror": 155, "repositori": [2, 9, 24, 40, 107, 110, 125, 132, 158, 163], "repr": 159, "repres": [0, 1, 2, 3, 7, 8, 11, 12, 13, 16, 19, 20, 28, 29, 30, 36, 37, 38, 57, 59, 70, 94, 102, 103, 108, 120, 124, 125, 136, 142, 145, 150, 159, 182], "represent": [12, 42, 91, 119, 129, 159, 176], "reproduc": [10, 20, 22, 44, 109, 136, 164], "repurpos": 95, "req": [2, 26, 29, 30, 32, 33, 44, 45, 64, 107, 136, 137, 139, 142, 143], "req_id": [66, 106], "req_perf_metrics_dict": 159, "req_stat": 182, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 1, 2, 4, 6, 8, 10, 11, 14, 15, 16, 17, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 43, 44, 45, 53, 54, 57, 64, 65, 68, 69, 71, 83, 84, 92, 94, 96, 98, 99, 100, 103, 107, 109, 114, 117, 118, 121, 122, 129, 135, 136, 137, 139, 142, 143, 144, 145, 151, 159, 162, 163, 164, 166, 167, 168, 172, 177, 181, 182], "request_finish": [64, 96], "request_id": [50, 64, 83, 159, 167], "request_json": 22, "request_perf_metr": [159, 164], "request_stats_max_iter": 159, "request_timeout": 27, "request_typ": 159, "request_type_context_and_gener": 0, "request_type_context_onli": 0, "request_type_generation_onli": 0, "requesterror": [109, 159], "requestid": [0, 114, 115], "requestidtyp": 0, "requestlist": 182, "requestoutput": [50, 109, 159, 164], "requestperfmetr": [0, 159], "requestschedul": 182, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 115, "requesttyp": [0, 1, 159], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 36, 37, 38, 39, 40, 41, 44, 45, 59, 70, 79, 82, 83, 84, 89, 91, 92, 94, 95, 96, 97, 98, 102, 104, 105, 107, 108, 110, 113, 114, 117, 118, 121, 122, 125, 129, 130, 131, 136, 137, 138, 139, 143, 145, 146, 148, 151, 154, 155, 158, 159, 163, 164, 168, 170, 174, 176, 181], "require_ln_f": 147, "requiresattentionmask": 1, "rerun": [21, 29, 30, 32, 33, 143], "rescale_output_factor": 146, "research": [14, 16, 32, 33, 55, 60, 61, 63, 82, 113, 117, 133, 153, 156, 158, 163], "reserv": [0, 1, 20, 24, 27, 29, 30, 32, 33, 34, 35, 144, 150, 151, 159, 182], "reserved_block": 182, "reset": [0, 1, 44, 118, 136, 150, 159, 164], "resetspeculativedecodingmodul": 1, "reshap": [1, 145], "reshapebuff": 1, "reshapecacheindirectionbuff": 1, "reshapespeculativedecodingbuff": 1, "resid": [16, 103, 122], "residu": [8, 107, 108, 145, 155], "residual_connect": 146, "residual_mlp": 147, "residual_multipli": 147, "residual_rms_norm": 145, "residual_rms_norm_out_quant_fp8": 145, "residual_rms_norm_out_quant_nvfp4": 145, "residual_rms_norm_quant_fp8": 145, "residual_rms_norm_quant_nvfp4": 145, "residual_rms_prepost_norm": 145, "residualadd": [23, 143, 148, 164], "resiz": 1, "resize_kv_cach": [84, 168, 172, 173], "resolv": [10, 17, 20, 27, 47, 76, 86, 92, 155, 159, 170], "resourc": [0, 10, 13, 15, 17, 20, 21, 32, 39, 42, 52, 83, 91, 92, 102, 107, 114, 117, 131, 162, 163, 166, 176, 181, 182], "resource_manag": [107, 159], "resourcemanag": 107, "respect": [0, 10, 17, 18, 21, 36, 40, 41, 50, 144, 145, 150, 151, 153, 159, 160, 165, 182], "respond": 20, "respons": [0, 8, 10, 11, 12, 17, 20, 21, 26, 29, 30, 31, 32, 33, 34, 35, 41, 42, 44, 50, 72, 74, 75, 76, 77, 78, 79, 81, 82, 91, 92, 95, 96, 99, 101, 109, 114, 120, 136, 145, 159, 166, 176, 177, 178], "response_format": [79, 94], "response_json": 79, "responsepostprocesswork": 159, "responsewithid": 0, "responsewrapp": 159, "rest": [1, 17, 83, 92, 107, 108, 117, 139, 159], "restart": 0, "restor": [1, 96], "restoremod": 1, "restrict": [0, 38, 39, 110, 115, 118, 145, 159, 180], "result": [0, 1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 44, 50, 73, 82, 83, 92, 94, 95, 98, 101, 103, 106, 108, 110, 116, 117, 123, 125, 129, 136, 138, 139, 140, 141, 142, 143, 145, 146, 148, 159, 160, 164, 165, 167, 180, 182], "result_dir": [26, 29, 30, 31, 32, 33, 34, 35], "result_handl": 159, "resum": [41, 159], "ret": 73, "retail": [44, 136], "retain": [3, 5, 14, 21, 107], "retent": [0, 57, 159], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": [95, 109, 125], "rethrown": 1, "retri": 39, "retriev": [0, 1, 11, 17, 24, 27, 42, 45, 64, 82, 92, 130, 145, 159], "retrievebadhandl": 1, "return": [0, 1, 9, 11, 12, 17, 18, 20, 21, 29, 30, 31, 32, 33, 34, 35, 38, 41, 42, 50, 64, 65, 66, 72, 73, 82, 91, 92, 96, 102, 106, 107, 108, 115, 119, 122, 125, 127, 129, 130, 131, 136, 142, 145, 146, 147, 150, 151, 155, 159, 164, 176, 181, 182], "return_all_generated_token": 150, "return_context_logit": 159, "return_dict": 150, "return_encoder_output": [150, 159], "return_generation_logit": 159, "return_perf_metr": 159, "returnallgeneratedtoken": [0, 115], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 10, 11, 14, 20, 23, 24, 39, 40, 56, 57, 64, 65, 71, 73, 92, 93, 100, 107, 108, 109, 114, 115, 120, 134, 145, 148, 150, 151, 152, 159, 160, 161, 162, 164, 165, 172, 181], "reusabl": [11, 16, 97, 120, 121], "reusedblock": [0, 27], "reusedblocksperrequest": 0, "reveal": [8, 11, 13, 15, 20], "revers": [1, 12, 96, 145], "revert": [97, 145], "review": [12, 16, 38, 44, 84, 112, 113, 136, 168], "revis": [22, 24, 27, 159], "revisit": 109, "reward": 11, "reward_control": 11, "reward_kwarg": 11, "rewind": [14, 164], "rewrit": [89, 108, 145, 160, 164, 165, 174], "rewritepatternmanag": 119, "rewritten": 108, "rewrt": 155, "rf": 155, "rfind": [64, 159], "rg_lru": 145, "rgc": [44, 136], "rh": [0, 1], "rich": [35, 128], "right": [11, 17, 19, 20, 21, 95, 139, 145, 155, 159], "rigor": [44, 136], "rindex": 159, "risk": [10, 86, 92, 114, 129, 139, 144, 170], "river": [30, 34], "rjust": 159, "rl": [105, 159], "rlhf": 105, "rm": [9, 21, 26, 29, 30, 32, 33, 110, 145, 154, 155, 160, 161, 163, 165], "rms_norm": [13, 145, 160, 165], "rmsnorm": [13, 108, 122, 145, 146, 147, 148, 160, 164, 165], "rmsnorm_quantization_plugin": 148, "rndv": [92, 114], "rnn": [23, 148, 164], "rnn_conv_dim_s": 150, "rnn_head_siz": 150, "rnn_hidden_s": 150, "rnn_state": 147, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": [2, 40], "roadmap": 109, "roberta": [154, 164], "robertaforquestionansw": 147, "robertaforsequenceclassif": 147, "robertamodel": 147, "robin": 17, "robot": [21, 72], "robust": [13, 16, 90, 164, 175], "rock": 145, "rocket": 159, "rocketkv": [73, 159], "rocketsparseattentionconfig": [73, 109, 159], "roi": 66, "role": [9, 17, 18, 21, 24, 27, 30, 31, 34, 35, 46, 47, 59, 75, 76, 79, 92, 94, 102, 129, 142, 163, 172], "roll": [1, 10, 34, 35], "rollback": 10, "rome": 94, "rooflin": 15, "room": 20, "root": [2, 20, 21, 29, 30, 31, 32, 33, 94, 110, 128, 132, 137, 145, 158, 159, 164], "root_lay": 119, "rootless": 40, "rope": [12, 13, 15, 38, 145, 150, 159, 164, 167], "rope_gpt_neox": [83, 117, 145, 147], "rope_gptj": [83, 117, 145], "rope_local_base_freq": 147, "rope_scaling_config": 145, "rope_scaling_factor": 38, "rope_scaling_long_factor": 146, "rope_scaling_long_mscal": 146, "rope_scaling_short_factor": 146, "rope_scaling_short_mscal": 146, "ropeembeddingutil": 145, "rotari": [0, 13, 145, 150, 159, 160, 165, 167], "rotary_bas": 147, "rotary_cos_sin": 145, "rotary_dim": 147, "rotary_embed": [160, 165], "rotary_embedding_bas": [145, 146], "rotary_embedding_base_loc": 146, "rotary_embedding_beta_fast": 146, "rotary_embedding_beta_slow": 146, "rotary_embedding_dim": [83, 117, 145, 147], "rotary_embedding_long_m_scal": 145, "rotary_embedding_max_posit": 145, "rotary_embedding_mscal": 146, "rotary_embedding_mscale_all_dim": 146, "rotary_embedding_origin_max_posit": 146, "rotary_embedding_original_max_posit": 145, "rotary_embedding_percentag": 146, "rotary_embedding_sc": 146, "rotary_embedding_scal": 145, "rotary_embedding_scale_typ": 145, "rotary_embedding_short_m_scal": 145, "rotary_inv_freq": [145, 146], "rotary_inv_freq_loc": 146, "rotary_pct": 147, "rotary_sc": [146, 147], "rotaryembed": [160, 165], "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 145, "rotat": 164, "rotate_every_two": 145, "rotate_half": 145, "roug": 24, "rouge_path": 24, "rough": 108, "roughli": [12, 19, 30, 34], "round": [11, 12, 17, 19, 31, 65, 145, 159], "round_robin": 17, "round_trip": 159, "roundtrip": 12, "rout": [12, 15, 16, 17, 20, 92, 103, 108, 114, 164], "router": [15, 16, 17, 92, 103, 116, 122, 164], "router_gemm": 13, "routin": [16, 107, 119], "routingkerneltopk": 13, "row": [10, 36, 102, 122, 142, 145, 153, 164, 170], "rowlinear": [122, 146], "rowwis": [104, 148, 159, 164, 170], "rpartit": 159, "rpc": 159, "rr": 164, "rslora": 164, "rsp": 159, "rsplit": 159, "rst": 115, "rstrip": 159, "rt": 21, "rtx": [45, 162, 164], "rubric": 145, "rule": [86, 117, 138, 155, 170], "run": [0, 1, 3, 7, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 27, 31, 39, 40, 56, 58, 61, 62, 64, 65, 71, 72, 83, 89, 90, 92, 95, 96, 101, 102, 103, 105, 107, 108, 109, 110, 112, 113, 114, 115, 117, 118, 121, 124, 125, 127, 128, 129, 132, 133, 138, 139, 142, 143, 144, 145, 150, 151, 153, 155, 159, 160, 164, 165, 166, 167, 174, 175, 178, 181], "run_all_demonstr": 72, "run_cmd": 110, "run_dsa": 73, "run_dtm_ngram": 125, "run_eagle3": 74, "run_llm": 73, "run_mtp": 74, "run_ngram": 74, "run_rocketkv": 73, "run_sqsh": 110, "run_task": 11, "runner": [0, 108, 128, 150], "runningleon": 164, "runpod": 132, "runtim": [0, 13, 14, 16, 17, 19, 20, 21, 22, 23, 27, 38, 43, 44, 58, 66, 70, 73, 83, 84, 85, 86, 89, 91, 96, 102, 108, 109, 113, 115, 117, 125, 126, 135, 136, 137, 140, 142, 145, 146, 147, 155, 158, 159, 160, 162, 164, 165, 167, 168, 169, 170, 172, 174, 176, 182], "runtime_config": 50, "runtime_default": 147, "runtime_error": 1, "runtime_rank": 150, "runtimedefault": [0, 147], "runtimedefaultsin": 147, "runtimeerror": [155, 159], "runtimetensor": 150, "rw": [9, 21, 29, 30, 32, 33], "s0": [83, 102, 117], "s1": [83, 102, 117], "s2": [83, 102, 117], "sa": 31, "sacrif": [12, 13], "sad": 150, "saeyoonoh": 164, "safe": [1, 15, 20, 119, 143], "safer": 145, "safetensor": [91, 128, 130, 155, 164, 176], "sage_attn": 145, "sage_attn_k_block_s": 145, "sage_attn_k_quant_s": 145, "sage_attn_q_block_s": 145, "sage_attn_q_quant_s": 145, "sage_attn_v_block_s": 145, "sage_attn_v_quant_s": 145, "sageattent": 145, "sai": [16, 34, 43, 102, 135, 137, 142], "said": 139, "sake": [20, 102, 142], "sale": [44, 66, 136], "salloc": 110, "salt": [0, 159], "same": [0, 1, 4, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 26, 29, 30, 32, 33, 34, 39, 41, 44, 45, 57, 64, 72, 83, 92, 95, 96, 97, 98, 103, 105, 107, 108, 109, 110, 114, 115, 117, 118, 119, 120, 121, 122, 123, 125, 126, 129, 131, 136, 139, 143, 144, 145, 146, 148, 150, 151, 158, 159, 164, 170], "sampl": [0, 1, 2, 10, 13, 14, 19, 22, 24, 29, 30, 32, 33, 42, 43, 44, 45, 55, 58, 60, 61, 62, 63, 66, 70, 73, 82, 83, 101, 109, 113, 115, 117, 129, 134, 135, 136, 145, 146, 150, 156, 157, 158, 159, 162, 163, 164, 178], "sample_num": 11, "sample_proj_bia": 146, "sample_st": [42, 101], "sample_weight_strip": 164, "samplemod": 145, "sampler": [22, 30, 42, 93, 107, 159, 161, 164], "sampler_force_async_work": 159, "sampler_opt": 22, "sampler_typ": [9, 159], "samplertyp": 159, "sampling_config": 150, "sampling_param": [11, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 71, 72, 73, 94, 99, 106, 113, 137, 144, 156, 158, 159, 163, 164, 177, 180], "samplingconfig": [0, 50, 115, 118, 150, 164], "samplingparam": [50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 71, 72, 73, 74, 82, 94, 99, 106, 109, 113, 137, 144, 156, 158, 159, 163, 164, 177, 180], "san": 94, "sandbox": 110, "saniti": [113, 138, 139, 143], "santa": 64, "santacod": [153, 154], "satfinit": 153, "satisfi": [11, 12, 16, 17, 118, 130, 164], "satur": [16, 20, 21, 102], "save": [2, 8, 10, 12, 14, 15, 16, 21, 23, 24, 26, 29, 30, 31, 32, 33, 34, 35, 43, 64, 83, 96, 97, 107, 108, 117, 121, 125, 131, 132, 135, 136, 139, 143, 144, 151, 159, 164], "save_checkpoint": [131, 147], "save_config": [131, 147], "save_hidden_st": 147, "save_kv_lay": [64, 96], "savehiddenstatesdecodingconfig": [109, 159], "savest": 159, "saw": 139, "sbatch": [67, 68, 69, 129], "sbsa": [157, 164], "scaffold": [109, 160, 164, 165], "scaffolding_llm": 164, "scaffoldingllm": 164, "scalabl": [8, 16, 20, 21, 38], "scalar": [108, 118, 123, 145], "scalartyp": 164, "scale": [0, 11, 15, 17, 21, 23, 57, 92, 103, 104, 107, 109, 118, 122, 130, 139, 145, 146, 148, 153, 159, 164], "scale_d0": 145, "scale_d1": 145, "scale_factor": 145, "scale_output": 145, "scale_qk": 146, "scale_typ": 145, "scalia": [55, 113, 156, 158, 163], "scaling_factor": [38, 145], "scaling_long_factor": 145, "scaling_short_factor": 145, "scalingfactor": 12, "scalingvecpoint": 1, "scan": 19, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scarc": 39, "scatter": [16, 119, 145, 148], "scatter_nd": 145, "scenario": [2, 5, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 22, 23, 36, 38, 45, 64, 65, 83, 96, 98, 100, 102, 103, 107, 117, 123, 125, 128, 133, 136, 137, 139, 142, 143, 148, 159, 163, 164], "scene": 11, "scfg": 150, "schedul": [0, 10, 11, 14, 15, 16, 19, 22, 23, 24, 27, 29, 30, 32, 33, 34, 35, 38, 44, 64, 89, 90, 92, 93, 106, 107, 108, 109, 114, 115, 121, 122, 136, 137, 143, 151, 157, 159, 161, 164, 174, 175], "schedule_request": 182, "scheduled_batch": [42, 101], "scheduled_request": 182, "scheduledrequest": [10, 107], "scheduler_config": [144, 159], "scheduler_output": [64, 96], "scheduler_polici": 22, "schedulerconfig": [0, 109, 144, 159, 164], "scheduleroutput": [64, 96], "schedulerpolici": 164, "scheduling_param": 159, "schedulingparam": 159, "schema": [0, 10, 44, 59, 81, 109, 115, 136, 148, 159, 164], "schema_gener": 159, "schema_json": 159, "scheme": [0, 97, 159, 164], "scicod": 13, "scienc": [55, 60, 61, 63, 64, 113, 156, 158, 163], "scope": [11, 14, 108, 164], "score": [15, 82, 97, 118], "scout": [19, 36, 90, 154, 161, 164, 175], "scratch": [16, 40, 65, 73, 136, 137, 138, 143], "script": [2, 16, 17, 20, 22, 26, 29, 30, 31, 32, 33, 34, 35, 39, 40, 43, 44, 45, 64, 65, 67, 68, 69, 71, 86, 89, 92, 96, 100, 103, 104, 110, 122, 124, 125, 127, 129, 131, 132, 135, 136, 137, 148, 153, 155, 158, 160, 164, 165, 170, 174, 179], "script_fil": 31, "sd3": 146, "sd35adalayernormzerox": 146, "sd3patchemb": 146, "sd3transformer2dmodel": 147, "sd3transformer2dmodelconfig": 147, "sdxl": 164, "se": 162, "seamless": [90, 109, 157, 162, 164, 175], "seamlessli": [88, 109, 158, 162, 173], "search": [0, 1, 6, 11, 22, 23, 24, 27, 39, 42, 50, 72, 94, 97, 98, 109, 115, 118, 125, 139, 142, 145, 159, 164, 166], "seashor": [27, 47, 76], "seat": [55, 113, 156, 158, 163], "sec": [2, 4, 12, 17, 44, 45, 109, 136, 137, 139, 142, 143], "second": [1, 2, 3, 5, 6, 8, 10, 11, 13, 16, 17, 18, 19, 21, 27, 64, 65, 91, 92, 95, 96, 97, 102, 115, 118, 121, 122, 125, 142, 145, 159, 176], "secondari": [0, 12, 97, 120, 151, 159], "secondary_kernel": 12, "secondary_offload_min_prior": [97, 159], "secondaryoffloadminprior": 0, "secondli": [102, 142], "section": [2, 12, 14, 15, 16, 20, 21, 26, 27, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 41, 44, 83, 88, 91, 92, 97, 102, 104, 107, 108, 110, 115, 118, 129, 130, 131, 136, 137, 139, 140, 141, 142, 143, 145, 154, 163, 164, 167, 173, 176], "section_s": 145, "secur": [59, 164], "securityprotocol": 59, "see": [0, 1, 2, 3, 5, 6, 7, 11, 12, 14, 15, 16, 19, 20, 21, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 44, 45, 47, 52, 55, 64, 76, 82, 83, 85, 88, 89, 94, 95, 96, 97, 100, 102, 103, 107, 108, 110, 111, 112, 113, 117, 118, 120, 125, 129, 130, 132, 133, 136, 137, 139, 142, 143, 144, 145, 146, 147, 151, 153, 155, 164, 169, 173, 174, 181], "seed": [0, 9, 22, 24, 53, 54, 118, 149, 154, 159, 164], "seek": [29, 30, 32, 33], "seem": [34, 44, 70, 121, 136, 138], "seen": [2, 16, 19, 44, 95, 125, 136], "segment": [108, 164], "select": [0, 1, 7, 12, 13, 15, 17, 19, 20, 23, 31, 39, 41, 42, 44, 91, 92, 103, 108, 109, 116, 118, 136, 143, 145, 148, 150, 151, 159, 166, 176, 182], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 145, "self": [0, 11, 38, 42, 64, 66, 83, 91, 96, 101, 106, 107, 108, 117, 119, 127, 129, 130, 136, 145, 147, 148, 150, 155, 159, 160, 165, 176, 181, 182], "self_attent": 130, "self_attention_mask": 146, "self_attention_packed_mask": 146, "self_attn": [130, 160, 165], "selfidx": 0, "sell": [44, 136], "semant": [38, 41], "semianalysi": 12, "semicolon": 110, "senat": [55, 113, 156, 158, 163], "send": [0, 9, 11, 12, 13, 16, 17, 20, 21, 27, 29, 30, 31, 32, 33, 34, 35, 41, 69, 92, 100, 114, 129, 137, 138, 145, 159, 163, 164], "sender": 159, "sens": 139, "sensit": [8, 13, 16, 20, 95, 139], "sent": [0, 15, 16, 17, 26, 27, 29, 30, 32, 33, 65, 92, 125, 159], "sentenc": [0, 9, 29, 32, 66, 118, 159, 163, 172], "sep": 159, "separ": [8, 10, 12, 16, 17, 20, 23, 34, 39, 45, 64, 83, 84, 86, 91, 92, 95, 96, 97, 103, 108, 110, 123, 125, 136, 145, 148, 150, 159, 162, 163, 167, 168, 170, 176], "separate_match_rewrit": 119, "seq": [1, 44, 83, 85, 117, 136, 145, 164, 169], "seq_idx": 150, "seq_len": [45, 83, 145, 146, 167], "seq_len_threshold": 159, "seq_length": 145, "seq_lens_cuda": [83, 167], "seqlen": [0, 12, 145], "seqslot": 1, "sequenc": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, 44, 45, 64, 66, 71, 72, 73, 82, 83, 84, 85, 92, 95, 96, 97, 102, 103, 106, 107, 109, 115, 117, 118, 119, 120, 121, 125, 129, 136, 137, 140, 141, 144, 145, 146, 150, 151, 159, 162, 164, 167, 168, 169, 172, 181], "sequence_length": [145, 146, 150, 155], "sequence_length_buff": 150, "sequence_limit_length": 150, "sequenceindex": [0, 115], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 8, 14, 65, 92, 114, 125, 151], "seri": [11, 12, 30, 89, 90, 162, 164, 174, 175], "serial": [20, 22, 23, 145, 147, 150, 159], "serializ": 159, "serialize_as_ani": 159, "serialize_engin": 150, "serializeds": 0, "serializedst": 0, "serv": [0, 6, 7, 8, 9, 10, 12, 16, 18, 19, 20, 24, 28, 29, 30, 32, 33, 34, 35, 36, 41, 44, 46, 47, 48, 49, 51, 53, 54, 58, 75, 76, 77, 78, 79, 80, 83, 93, 95, 96, 101, 102, 103, 105, 108, 109, 113, 114, 115, 117, 120, 129, 143, 159, 161, 162, 164, 166, 167, 174, 178], "server": [0, 4, 16, 20, 26, 45, 46, 47, 48, 49, 51, 53, 54, 69, 75, 76, 77, 78, 79, 80, 95, 109, 114, 121, 125, 129, 132, 162, 163, 164, 172], "server_env_var": 31, "server_rol": 27, "server_start_timeout": 27, "servic": [11, 17, 20, 21, 40, 66, 92, 108, 164], "session": [64, 110, 117, 136, 150, 159], "set": [0, 1, 2, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 31, 36, 37, 39, 40, 43, 45, 50, 56, 57, 59, 65, 67, 68, 69, 82, 83, 86, 91, 92, 95, 97, 98, 101, 102, 103, 104, 106, 107, 108, 110, 113, 114, 115, 116, 117, 118, 119, 120, 123, 124, 125, 126, 128, 130, 131, 135, 137, 139, 142, 143, 144, 145, 146, 147, 148, 150, 151, 155, 158, 159, 163, 164, 170, 172, 176, 178, 182], "set_api_statu": 38, "set_attn_processor": 147, "set_context_fmha": 148, "set_default_max_input_len": 159, "set_dora_plugin": 148, "set_fp8_rowwise_quant_plugin": 148, "set_from_opt": 1, "set_if_not_exist": 147, "set_input_shap": 150, "set_lora_plugin": 148, "set_nccl_plugin": 148, "set_qserve_plugin": 148, "set_rank": 147, "set_rel_attn_t": 146, "set_shap": 150, "set_smooth_quant_plugin": 148, "set_stream": 108, "set_stream_1": 108, "setadditionalmodeloutput": [0, 115], "setallottedtimem": 0, "setattentiondpeventsgatherperiodm": 0, "setattr": 11, "setbackend": 0, "setbackendtyp": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": [0, 1], "setbeamwidtharrai": 0, "setbitto": 0, "setcachesaltid": 0, "setcachest": 0, "setcachetransceiverconfig": [0, 92], "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": 0, "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfailfastonattentionwindowtoolarg": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgenerationstep": 1, "setgpuweightsperc": [0, 126], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setkvtransfersenderfuturetimeoutm": 0, "setkvtransfertimeoutm": 0, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxgputotalbyt": 0, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmaxtokensinbuff": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setmultimodalinput": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setpromptignorelength": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 115], "setrequeststatsmaxiter": 0, "setrequesttyp": 0, "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 118], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 9, 17, 18, 26, 29, 30, 31, 32, 33, 34, 35, 41, 59, 67, 68, 69, 89, 113, 117, 138, 139, 150, 151, 158, 159, 162, 163, 164, 174], "setup_async": 159, "setup_embedding_parallel_mod": 159, "setup_fake_prompt": 150, "setup_fake_prompts_qwen2vl": 150, "setup_fake_prompts_vila": 150, "setup_input": 150, "setupbuff": 1, "setupcacheindirect": 1, "setupcacheindirectionbuff": 1, "setupspeculativedecod": 1, "setupspeculativedecodingbuff": 1, "setuptool": [89, 113, 174], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setuseuvm": 0, "setvirtualmemoryalloc": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seven": 19, "sever": [0, 1, 8, 10, 11, 16, 17, 18, 19, 20, 21, 42, 45, 50, 65, 82, 83, 92, 96, 102, 108, 117, 119, 125, 128, 139, 140, 141, 142, 143, 145, 151, 155, 162, 167], "sft": [70, 90, 175], "sglang": [10, 11, 16, 101, 178], "sh": [18, 26, 29, 30, 31, 32, 33, 34, 35, 40, 67, 68, 69, 104, 129, 132, 164, 179], "shah": 164, "shall": [1, 131, 151], "shape": [0, 1, 10, 12, 13, 15, 83, 102, 108, 117, 119, 122, 128, 129, 143, 145, 147, 150, 151, 153, 155, 159, 164, 167, 170, 181], "shape_cast_dtyp": 145, "shapeequ": 1, "shard": [13, 22, 39, 85, 89, 90, 95, 103, 130, 136, 140, 145, 146, 169, 174, 175], "shard_map": 130, "sharding_along_vocab": 159, "sharding_dim": [145, 146], "share": [1, 2, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 41, 64, 71, 83, 91, 92, 94, 96, 97, 98, 107, 110, 114, 115, 117, 119, 120, 121, 122, 125, 131, 138, 139, 145, 146, 148, 164, 170, 176], "share_embed": 164, "share_weight": 146, "shared_embedding_t": 164, "shared_expert_output": 145, "shared_fc1": 15, "shared_fc2": 15, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": [10, 89, 164, 174], "shell": [39, 40, 111], "sherlock113": 164, "shift": [14, 16, 123], "ship": [21, 131], "shm": [16, 31, 155], "short": [8, 11, 16, 21, 24, 72, 83, 102, 117, 136, 139, 142, 159], "short_factor": 145, "short_mscal": [145, 146], "shorter": [45, 66, 83, 102, 117], "shortli": 27, "shot": [29, 32, 33, 164], "should": [0, 1, 2, 10, 11, 12, 15, 16, 20, 22, 24, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 41, 44, 45, 50, 55, 57, 59, 64, 67, 68, 69, 70, 71, 82, 83, 88, 91, 94, 96, 97, 102, 107, 108, 110, 112, 113, 115, 119, 121, 122, 123, 131, 136, 137, 138, 143, 144, 145, 146, 150, 151, 156, 158, 159, 160, 163, 164, 165, 167, 176, 181, 182], "should_early_stop": 11, "should_skip_modul": [91, 176], "should_stop": 150, "shouldus": [83, 117], "show": [4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 31, 39, 45, 55, 65, 82, 92, 97, 98, 102, 108, 115, 129, 137, 142, 143, 148, 151, 154, 157, 163, 172], "showcas": [58, 65, 139, 143], "shown": [5, 11, 12, 14, 16, 17, 26, 29, 30, 31, 32, 33, 41, 44, 45, 64, 91, 92, 102, 110, 123, 136, 137, 139, 142, 143, 145, 176], "shrunk": 145, "shuffl": 145, "shutdown": [0, 65, 158, 159], "shutdown_abort": 159, "si": [83, 117], "sibl": 129, "side": [12, 16, 20, 42, 86, 95, 108, 115, 145, 159, 170], "side_stream_id": 145, "sidestreamidtyp": 145, "sigh": 70, "sigmoid": [129, 145], "signal": [0, 11, 12, 20, 31], "signatur": [10, 38, 119, 145], "signifi": [102, 142], "signific": [5, 8, 10, 11, 12, 14, 15, 16, 20, 42, 70, 83, 92, 114, 115, 117, 120, 138, 139, 142, 143], "significantli": [7, 8, 12, 13, 14, 15, 16, 17, 20, 26, 29, 30, 32, 33, 34, 35, 42, 65, 83, 86, 98, 101, 102, 103, 108, 137, 138, 139, 142, 143, 151, 159, 167, 170, 178], "silicon": [15, 21], "silu": [129, 145, 146], "silu_and_mul": 108, "silu_and_mul_1": 108, "similar": [0, 2, 3, 5, 8, 10, 11, 12, 14, 16, 19, 20, 21, 26, 43, 44, 50, 83, 107, 117, 118, 119, 125, 135, 136, 144, 145, 163, 166, 182], "similarli": [10, 91, 92, 105, 125, 176], "simpl": [9, 11, 12, 16, 18, 19, 20, 21, 31, 34, 45, 55, 84, 86, 90, 96, 105, 107, 108, 110, 112, 119, 120, 125, 129, 156, 157, 158, 163, 168, 170, 172, 175], "simple_shard_onli": 173, "simpler": [16, 125], "simpleschedul": 182, "simplest": [95, 107, 111, 145], "simpli": [11, 12, 44, 45, 86, 98, 102, 107, 117, 125, 136, 142, 148, 155, 158, 160, 163, 165, 170], "simplic": [19, 131], "simplifi": [8, 10, 24, 41, 42, 44, 64, 89, 90, 96, 102, 117, 131, 136, 142, 145, 158, 164, 174, 175], "simul": 65, "simultan": [8, 99, 106, 125, 142, 170, 177], "sin": [0, 145, 146], "sinc": [0, 1, 2, 8, 10, 12, 14, 15, 16, 19, 20, 22, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 41, 44, 50, 65, 66, 83, 86, 98, 102, 107, 108, 110, 116, 117, 119, 121, 125, 126, 131, 132, 136, 137, 138, 139, 142, 143, 145, 147, 148, 151, 159, 166, 170, 181, 182], "sincer": [10, 11, 15], "sinco": 146, "singl": [0, 1, 2, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 42, 43, 44, 47, 57, 64, 65, 76, 83, 86, 89, 92, 95, 97, 102, 103, 105, 107, 108, 114, 115, 116, 117, 118, 120, 125, 127, 129, 131, 135, 136, 139, 143, 145, 147, 148, 150, 151, 153, 159, 160, 162, 163, 164, 165, 166, 167, 170, 172, 174, 181], "singleton": [119, 145], "sink": [0, 1, 83, 117, 150, 159], "sink_token_len": 150, "sink_token_length": [83, 97, 117, 150, 159], "sinktokenlength": [0, 1], "sinusoid": 146, "sit": [30, 70, 131], "situaiton": 45, "situat": [20, 34, 70, 102, 125, 137, 142], "six": 14, "size": [0, 1, 2, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 42, 43, 44, 45, 50, 65, 67, 68, 69, 71, 73, 83, 84, 85, 86, 88, 92, 95, 98, 99, 102, 103, 107, 108, 109, 114, 117, 118, 120, 121, 122, 123, 125, 126, 135, 136, 137, 138, 139, 140, 143, 145, 146, 147, 148, 150, 155, 159, 164, 167, 168, 169, 170, 173, 177, 182], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 2, 10, 22, 39, 64, 65, 84, 85, 91, 92, 96, 110, 119, 130, 133, 145, 159, 164, 168, 169, 170, 176, 182], "skip_attn": [145, 146], "skip_cross_attn_block": [147, 150], "skip_cross_kv": [146, 150], "skip_encod": 150, "skip_indexer_for_short_seq": 159, "skip_loading_weight": [84, 88, 168, 172, 173], "skip_special_token": [159, 164], "skip_tokenizer_init": [50, 159], "skipcrossattnblock": [0, 1], "sku": [137, 139, 142, 143], "skylin": 34, "skyscrap": 34, "skywork": [153, 154, 164], "sleep": [133, 159], "slice": [1, 103, 108, 116, 130, 145, 159, 164], "slice_shap": 130, "sliceinputtyp": 145, "slicen": 1, "slide": [0, 19, 93, 109, 120, 144, 145, 150, 161, 164], "slider": [2, 13, 44, 136], "sliding_window": 147, "sliding_window_caus": 145, "slight": [2, 14, 15, 139, 142, 143], "slighter": 10, "slightli": [0, 10, 21, 27, 104, 122, 123, 139, 143], "slope": [83, 117, 145], "slot": [0, 1, 10, 16, 103, 164], "slot_map": [145, 147], "slotid": 16, "slotidx": 1, "slotsperpag": 1, "slow": [20, 90, 108, 115, 121, 138, 159], "slowdown": [20, 159], "slower": [15, 42, 65, 120, 131, 138], "slowest": [8, 83, 117], "slurm": [16, 20, 26, 109, 113, 129, 155, 164], "slurm_script": [20, 103], "sm": [21, 154, 164], "sm100": [104, 154, 161], "sm120": [104, 154, 164], "sm121": 164, "sm80": [154, 164], "sm86": [154, 164], "sm89": [154, 164], "sm90": [104, 154, 161, 164], "small": [7, 10, 12, 13, 14, 15, 16, 19, 20, 21, 65, 66, 83, 99, 103, 108, 117, 121, 123, 125, 129, 137, 139, 142, 143, 145, 151, 152, 154, 155, 164, 177], "smaller": [1, 2, 11, 14, 19, 20, 21, 43, 44, 88, 98, 102, 106, 108, 125, 135, 136, 138, 142, 143, 144, 145, 151, 164, 173], "smallest": [0, 1, 120, 145], "smart": [17, 92, 145, 164], "smaug": [154, 164], "smi": [2, 13, 18, 21, 29, 30, 32, 33, 34, 35, 44, 92, 114, 136, 151], "smile": 70, "smith": [55, 60, 61, 62, 63, 113, 156, 158, 163], "smooth": [131, 148, 159, 164], "smooth_quant_gemm_plugin": 148, "smooth_quant_plugin": 148, "smoother": 2, "smoothli": 11, "smoothquant": [7, 119, 164], "smoothquant_v": 159, "snap": 159, "snapshot": [10, 136], "snapshot_download": 70, "snip": [44, 136], "snippet": [10, 21, 136, 164, 182], "snshrivas10": 70, "so": [0, 1, 2, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 29, 30, 31, 32, 33, 34, 38, 39, 44, 45, 50, 56, 64, 83, 97, 102, 105, 106, 107, 108, 110, 115, 117, 119, 122, 125, 131, 132, 136, 138, 139, 142, 143, 144, 145, 146, 147, 150, 151, 154, 159, 160, 164, 165, 172, 181], "soc": 64, "socketst": 0, "softmax": [14, 15, 82, 83, 117, 129, 145, 167], "softplu": 145, "softwar": [15, 16, 19, 21, 29, 30, 32, 33, 112, 113, 115, 117, 129, 164], "sol": 17, "sole": 24, "solid": [12, 140, 141], "solut": [10, 11, 16, 20, 21, 97, 103, 108, 113, 155, 158, 166], "some": [0, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 27, 30, 39, 40, 41, 45, 66, 70, 83, 86, 91, 92, 97, 100, 102, 108, 113, 114, 115, 116, 117, 118, 119, 121, 125, 126, 128, 129, 131, 133, 139, 140, 141, 143, 144, 145, 148, 151, 155, 158, 159, 160, 164, 165, 166, 170, 182], "some_uri": 40, "someon": 34, "someone\u56fd\u5916": 34, "someth": [11, 31, 50, 129], "sometim": [16, 17, 39, 44, 92, 136], "song": [44, 136], "soon": [0, 3, 4, 5, 6, 7, 20, 50, 97, 105], "sophist": [8, 20, 86, 170], "sora": [27, 47, 76], "sort": [0, 8, 115, 118, 145, 159], "sota": 164, "sourc": [0, 2, 3, 6, 10, 11, 13, 15, 16, 22, 23, 27, 29, 30, 32, 33, 34, 35, 40, 46, 47, 48, 49, 51, 53, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 86, 89, 91, 94, 96, 108, 111, 112, 113, 124, 127, 128, 130, 131, 145, 146, 147, 148, 149, 150, 159, 162, 164, 170, 174, 176], "source_dir": 40, "source_param1": [91, 176], "source_param2": [91, 176], "source_root": [67, 68, 69], "sourcetaskvalu": 1, "south": [27, 30], "southeast": 30, "southern": 30, "southwest": 30, "soyer": [127, 129, 155], "sp": [86, 170], "sp_kwarg": [86, 170], "space": [16, 17, 20, 65, 89, 92, 97, 100, 102, 107, 110, 122, 142, 151, 159, 170, 174, 181], "spaces_between_special_token": [159, 164], "span": [13, 14, 16, 17, 98, 131], "spars": [15, 20, 58, 103, 125, 145, 159, 164], "sparse_attention_config": [73, 159], "sparse_fc1": 15, "sparse_fc2": 15, "sparseattentionconfig": 159, "sparsiti": [16, 20, 21, 23, 159], "spatial": 95, "spatial_norm_dim": 146, "spawn": [63, 105, 137, 155, 158], "spawnprocess": 0, "speakleash": 154, "spec": [16, 164], "spec_config": [19, 74], "spec_dec_mod": 159, "spec_decode_algo": [14, 19], "spec_decode_nextn": 14, "spec_decoding_generation_length": [145, 146, 147], "spec_decoding_is_generation_length_vari": [145, 146, 147], "spec_decoding_max_generation_length": [145, 146], "spec_decoding_packed_mask": [145, 146, 147], "spec_decoding_param": [146, 147], "spec_decoding_position_offset": [145, 146, 147], "spec_decoding_us": [145, 146], "specconfig": [107, 164], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 146, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "special": [3, 10, 11, 12, 14, 23, 83, 91, 92, 108, 114, 117, 122, 129, 130, 148, 159, 164, 170, 176], "specif": [0, 1, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 20, 21, 22, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 41, 44, 64, 66, 71, 72, 86, 91, 92, 96, 97, 99, 103, 108, 109, 110, 113, 116, 118, 119, 120, 122, 123, 124, 125, 128, 131, 136, 138, 139, 143, 145, 158, 159, 160, 162, 163, 164, 165, 166, 170, 176, 177], "specifi": [0, 1, 2, 10, 11, 16, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 44, 45, 50, 59, 66, 70, 79, 82, 83, 84, 85, 86, 87, 89, 90, 92, 94, 97, 98, 107, 108, 110, 115, 117, 118, 119, 120, 122, 125, 130, 131, 135, 136, 138, 139, 142, 144, 145, 147, 148, 150, 151, 155, 158, 159, 163, 164, 167, 168, 169, 170, 171, 174, 175, 176], "specmetadata": 107, "spectrum": [8, 162], "specul": [0, 1, 13, 16, 17, 18, 42, 58, 89, 106, 108, 109, 115, 134, 136, 137, 145, 147, 156, 158, 159, 162, 164, 174, 180], "speculative_config": [2, 9, 13, 14, 29, 74, 107, 108, 159], "speculative_decod": 164, "speculative_decoding_draft_tokens_extern": 147, "speculative_decoding_mod": [23, 136, 159], "speculative_model": 107, "speculative_model_dir": [9, 18, 74, 159], "speculative_model_format": 159, "speculativeconfig": 159, "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [107, 147, 159, 164], "speculativedecodingmodul": 164, "speculativedecodingoutput": 1, "speed": [4, 12, 13, 14, 15, 16, 18, 23, 44, 45, 107, 129, 136, 143, 148, 164], "speedup": [2, 4, 6, 7, 8, 10, 11, 13, 15, 17, 18, 21], "spent": 0, "spirit": 16, "split": [1, 12, 19, 34, 44, 83, 95, 96, 98, 102, 103, 116, 117, 122, 129, 136, 138, 139, 145, 151, 159, 162, 164, 170], "split_input_id": 150, "split_prompt_by_imag": 150, "split_siz": 145, "split_size_or_sect": 145, "splitlin": 159, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 114, "spot": [16, 20, 142], "sq": [7, 153, 164], "sqrt": [83, 117, 145], "sqsh": 110, "sqsh_path": 110, "squar": [20, 26, 34, 102, 142, 145], "squared_relu": 145, "squash": 110, "squeez": [1, 145, 150], "src": [1, 129, 145], "src_seq_len": 145, "srcdesc": 0, "srctype": 1, "srun": [27, 67, 68, 69, 113, 129, 155], "ssa": 108, "ssd": [9, 18, 21, 96], "ssh": 40, "sshd": 132, "ssid": 59, "ssm": [89, 145, 159, 170, 174], "ssm_state": 147, "stabil": [8, 13, 16, 89, 105, 109, 124, 159, 174], "stabl": [16, 23, 38, 83, 102, 108, 117, 130, 137, 142, 143, 145, 148, 159, 164], "stack": [9, 11, 13, 18, 21, 26, 29, 30, 32, 33, 90, 110, 130, 145, 163, 175], "stacklevel": 38, "stackoverflow": 40, "stage": [0, 11, 14, 17, 18, 22, 38, 41, 45, 83, 86, 89, 92, 100, 105, 107, 108, 109, 117, 119, 151, 164, 167, 170, 174], "stage_list": 39, "stai": [4, 7, 16, 41, 138, 143], "stall": [16, 96], "stand": 129, "standalon": 131, "standard": [0, 3, 10, 11, 12, 16, 18, 21, 22, 45, 84, 86, 89, 90, 96, 100, 125, 129, 145, 168, 170, 174, 175], "starcod": [154, 164], "starcoder1": 153, "starcoder2": [90, 153, 164, 175], "starrickliu": 164, "start": [0, 2, 11, 12, 14, 16, 20, 21, 23, 24, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 45, 46, 47, 48, 49, 51, 53, 54, 56, 57, 69, 70, 75, 76, 77, 78, 79, 80, 92, 102, 103, 110, 115, 117, 119, 121, 132, 133, 136, 137, 138, 142, 144, 145, 147, 149, 150, 151, 159, 162, 164], "start_dim": 145, "start_idx": 24, "start_load_kv": [64, 96], "start_tag": 94, "started_loading_req_id": [64, 96], "startswith": 159, "startup": [26, 29, 30, 31, 32, 33, 34, 35, 92, 114, 155], "stat": [0, 159, 164], "state": [0, 1, 2, 11, 12, 13, 14, 16, 17, 20, 23, 29, 30, 31, 32, 33, 34, 44, 45, 55, 56, 57, 65, 82, 83, 89, 91, 94, 96, 97, 102, 103, 107, 109, 113, 115, 116, 117, 119, 120, 121, 125, 136, 137, 142, 144, 145, 148, 156, 158, 159, 163, 164, 170, 174, 176, 182], "state_dict": [91, 176], "state_dtyp": 150, "state_or_ptr": 145, "state_s": 150, "statement": [11, 158], "staten": [30, 34], "stateptr": 0, "states": 1, "static": [0, 1, 15, 20, 23, 85, 97, 115, 124, 125, 145, 146, 147, 148, 150, 159, 164, 169], "static_batch": [144, 159], "static_cast": [1, 153], "staticbatchingstat": 0, "statist": [0, 18, 22, 27, 44, 65, 103, 115, 125, 136, 159, 164], "statu": [0, 1, 9, 12, 16, 21, 29, 30, 31, 32, 33, 34, 35, 38, 94, 96, 109, 155, 159, 164], "status": 38, "std": [0, 1, 16, 115], "stddev": [53, 54], "stderr": [29, 32, 33], "stdev": [2, 22, 43, 44, 45, 68, 135, 136, 137], "stdin": 22, "stdit": 164, "stdout": [22, 45, 136, 137], "steadi": 45, "steady_clock": 0, "stem": [12, 20], "step": [0, 1, 3, 10, 11, 12, 13, 14, 21, 26, 38, 42, 44, 45, 65, 72, 82, 83, 84, 101, 102, 104, 105, 107, 108, 109, 113, 117, 118, 119, 121, 125, 128, 129, 131, 133, 136, 137, 145, 150, 155, 159, 164, 166, 167, 168, 178, 181, 182], "stick": 34, "still": [2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 38, 41, 44, 98, 108, 117, 130, 131, 136, 137, 139, 145, 150, 151, 164], "stine": 29, "stoica": 11, "stop": [0, 1, 11, 16, 21, 30, 35, 42, 64, 66, 101, 102, 106, 115, 118, 119, 125, 136, 142, 150, 159, 162, 163, 164, 178, 180], "stop_reason": [18, 21, 29, 30, 31, 32, 33, 34, 159, 163, 164], "stop_token_id": [115, 159], "stop_words_data": 150, "stop_words_list": 150, "stopping_criteria": 150, "stoppingcriteria": [150, 164], "stoppingcriterialist": 150, "stoptokenid": [0, 115], "stopword": [0, 118], "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 1, 8, 9, 18, 21, 64, 71, 91, 96, 120, 122, 158, 159, 176], "store": [0, 1, 4, 10, 12, 13, 14, 16, 18, 22, 27, 38, 39, 42, 44, 57, 64, 65, 71, 83, 91, 96, 97, 98, 102, 103, 107, 117, 120, 121, 122, 129, 136, 144, 145, 147, 151, 153, 159, 160, 164, 165, 167, 176, 181], "store_tru": [65, 73], "stori": [70, 72], "str": [11, 38, 61, 62, 64, 66, 70, 71, 72, 73, 74, 76, 78, 91, 92, 128, 131, 145, 146, 147, 148, 150, 159, 176], "straight": 110, "straightforward": [10, 14, 20, 21, 34, 36], "strateg": 8, "strategi": [0, 2, 7, 14, 16, 17, 20, 42, 44, 50, 92, 95, 96, 105, 106, 108, 109, 123, 125, 136, 140, 145, 147, 151, 158, 159, 162, 164, 170], "stream": [0, 1, 9, 10, 11, 12, 15, 16, 21, 22, 23, 31, 38, 41, 43, 44, 50, 53, 54, 58, 64, 66, 92, 96, 114, 115, 129, 135, 145, 150, 151, 155, 159, 164], "stream_interv": [20, 21, 29, 30, 31, 34, 51, 159], "stream_ptr": [66, 106], "streaming_llm": 164, "streamingllm": [23, 148, 164], "streamlin": [41, 42, 44, 89, 90, 136, 158, 163, 174, 175], "streamptr": [0, 1, 115], "street": [34, 70], "strenum": [149, 159], "stretch": 30, "strict": [13, 14, 16, 29, 32, 33, 86, 108, 159, 170], "strict_bound": 145, "strict_dtyp": [145, 146], "strictbasemodel": 159, "stricter": 13, "strictli": [10, 44, 136, 159], "stride": [1, 145, 146, 170], "strike": [16, 97, 125], "string": [0, 1, 20, 24, 27, 39, 44, 59, 79, 82, 94, 97, 115, 128, 136, 145, 148, 150, 159], "string_valu": 121, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [23, 159, 164], "strip_plan": 23, "strive": [89, 105, 162, 174], "strong": [16, 19], "strongli": 139, "strongly_typ": [159, 164], "struct": [0, 1, 120], "structur": [0, 10, 15, 19, 21, 24, 31, 34, 41, 44, 89, 96, 97, 107, 109, 116, 119, 120, 125, 136, 145, 151, 159, 164, 174], "structural_tag": [94, 159], "struggl": 70, "student": [34, 55, 60, 61, 63, 113, 156, 158, 163], "studi": [15, 102, 109, 137, 139, 140, 141, 143], "studio": 40, "style": [13, 83, 107, 108, 117, 125, 164], "sub": [11, 16, 108, 128, 131, 145, 159], "subclass": [1, 11, 66, 96, 107, 131, 159, 160, 165], "subcommad": 136, "subcommand": [2, 22, 45, 164], "subcompon": [91, 176], "subdirectori": [44, 136], "subgraph": [108, 119, 145], "subject": [3, 5, 6, 7, 38, 83, 89, 114, 145, 157, 159, 174], "submiss": [41, 136], "submit": [0, 11, 26, 29, 30, 31, 32, 33, 38, 57, 122, 136, 159], "submit_sync": 159, "submittransferrequest": 0, "submodul": [2, 18, 107, 110, 160, 164, 165], "suboptim": [12, 21, 129], "subscript": 145, "subsequ": [8, 10, 11, 12, 14, 20, 39, 42, 64, 110, 121, 122, 125, 137, 164], "subset": [0, 14, 44, 103, 106, 107, 108, 115, 118, 129, 131, 136, 145, 159, 180], "substanti": [8, 13, 15, 17, 92, 108, 114, 121, 125], "substitut": [40, 159], "substr": [39, 159], "subsystem": 164, "subtract": 119, "succe": [1, 8, 151, 162, 164], "succeed": 150, "success": [1, 4, 10, 13, 20, 26, 29, 30, 32, 33, 39, 45, 115, 159], "successfulli": [1, 31, 65, 94, 125, 133, 139], "suddenli": 11, "sudo": [2, 13, 44, 89, 113, 136, 174], "suffer": [13, 16, 20], "suffici": [11, 12, 20, 41, 102, 138, 139], "suffix": [11, 29, 30, 32, 33, 38, 107, 159], "sugar": 11, "suggest": [7, 11, 16, 66, 70, 117, 139, 158], "suit": [8, 16, 17, 21, 24, 42, 44, 45, 97, 117, 136], "suitabl": [16, 17, 40, 64, 92, 100, 159], "sum": [1, 26, 29, 30, 32, 33, 108, 119, 127, 145, 181], "sum_": 8, "sum_of_token": 145, "summar": [5, 7, 16, 24, 45, 98, 99, 117, 125, 126, 127, 128, 136, 144, 151, 177], "summari": [9, 11, 16, 34, 120, 125], "summat": 145, "sunjiabin17": 164, "sunset": 72, "super": [11, 34, 64, 90, 91, 108, 119, 127, 130, 131, 154, 155, 160, 161, 165, 175, 176, 182], "superchip": 154, "superior": 8, "supplementari": 146, "suppli": [39, 66, 107, 122, 172], "support": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 21, 22, 23, 24, 26, 27, 28, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 45, 50, 59, 64, 67, 68, 69, 70, 73, 82, 83, 86, 91, 94, 96, 97, 98, 102, 103, 105, 106, 107, 108, 109, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 128, 131, 132, 134, 137, 139, 142, 143, 144, 145, 146, 148, 155, 157, 158, 159, 160, 163, 164, 165, 166, 167, 170, 172, 176, 179, 180, 181, 182], "support_partial_config": 86, "supports_backend": 159, "supportsinflightbatch": 1, "suppos": [107, 160, 165], "suprem": [55, 113, 156, 158, 163], "sure": [2, 10, 14, 16, 18, 21, 29, 30, 32, 33, 34, 35, 44, 64, 86, 92, 102, 107, 108, 110, 113, 114, 131, 133, 136, 144, 145, 158, 164, 170], "surfac": 38, "surpass": [83, 98, 117], "surprisingli": 10, "surround": [83, 117, 164], "survei": 10, "surviv": 64, "swa": 120, "swap": [16, 36, 96, 120], "swapcas": 159, "sweep": [4, 17, 30, 34, 35, 129, 142], "sweet": 142, "swept": [5, 10], "swiftli": [16, 19], "swiglu": [23, 145, 148, 164], "switch": [4, 7, 13, 15, 17, 24, 26, 92, 99, 110, 116, 121, 123, 124, 144, 151, 164, 177], "swizzl": 12, "sxm": [4, 45, 137, 139, 140, 141], "sy": [20, 64, 108, 164], "sym_min": 108, "symbol": 0, "symint": 108, "symm_mem": 145, "sync": 150, "sync_quant_config_with_kv_cache_config_dtyp": 159, "synchron": [0, 1, 8, 10, 16, 20, 42, 64, 96, 115, 129, 155, 159, 164], "syncmessag": 0, "syntact": 11, "syntax": [109, 145, 163], "synthet": [2, 22, 26, 44, 45, 53, 54, 68, 136], "synthetic_128_128": [44, 84, 136, 168], "synthetic_2048_2048": 137, "synthetic_2048_2048_1000": 137, "synthetic_lora_data": [44, 136], "system": [2, 4, 8, 10, 11, 12, 14, 15, 16, 18, 20, 21, 24, 26, 27, 29, 30, 32, 33, 39, 41, 45, 46, 47, 56, 57, 59, 64, 65, 67, 68, 69, 75, 76, 79, 86, 91, 94, 96, 101, 103, 109, 110, 113, 120, 121, 129, 138, 154, 157, 162, 163, 164, 170, 172, 178], "system_prompt": [24, 94], "systemat": [13, 16, 17], "t": [0, 1, 8, 9, 11, 13, 15, 16, 20, 27, 29, 30, 31, 32, 33, 34, 35, 42, 43, 44, 50, 64, 66, 67, 68, 69, 74, 83, 92, 98, 102, 103, 107, 108, 113, 114, 117, 125, 129, 131, 132, 135, 136, 138, 142, 143, 145, 147, 150, 155, 159, 164, 172], "t14": 31, "t4": 21, "t5": [104, 117, 118, 153, 154, 164], "t_": [14, 26, 29, 30, 32, 33], "t_2": 14, "t_5": 14, "tab": 159, "tabl": [0, 4, 7, 23, 29, 30, 36, 45, 103, 107, 118, 121, 136, 145, 146, 150, 154, 155, 159, 161, 164], "tabsiz": 159, "tackl": 15, "tactic": [15, 23], "tag": [0, 1, 11, 22, 24, 27, 29, 30, 32, 33, 39, 40, 110, 113, 132, 159, 164], "tagentrymap": 1, "tail": [8, 12], "tailor": [7, 139, 143], "take": [0, 1, 10, 11, 12, 14, 16, 17, 18, 19, 21, 29, 30, 32, 33, 34, 35, 42, 70, 83, 92, 97, 102, 107, 108, 114, 117, 118, 119, 121, 123, 128, 131, 136, 137, 138, 142, 145, 146, 148, 159, 164, 181], "takeawai": 34, "taken": [3, 4, 16, 130, 145], "talk": [16, 34, 70], "tanh": [145, 146], "tar": 24, "target": [0, 1, 2, 8, 15, 16, 17, 19, 22, 23, 27, 36, 37, 50, 66, 86, 91, 95, 110, 130, 136, 143, 144, 148, 159, 164, 170, 176], "target_input_len": 22, "target_model": 107, "target_module_nam": [91, 176], "target_output_len": 22, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 70, "tarot_lora_dir": 70, "task": [0, 1, 8, 10, 11, 16, 19, 20, 21, 22, 24, 29, 32, 33, 42, 44, 61, 62, 66, 67, 68, 69, 82, 96, 98, 99, 101, 121, 122, 125, 127, 128, 136, 146, 150, 153, 158, 159, 164, 177, 178, 181], "task_collect": 11, "task_handl": 11, "task_id": [44, 122, 136], "task_vocab_s": 146, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskstatu": 11, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 164, "tb": [12, 159], "tconstptr": 1, "tcp": 133, "tdp": 45, "team": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 29, 30, 32, 33, 39, 92, 128, 131, 133, 154, 164], "teamwork": 12, "teardown": 1, "tech": [14, 16, 17, 92, 164], "technic": [8, 14, 15, 16, 20, 34, 103, 108, 120], "techniqu": [3, 8, 10, 12, 13, 14, 15, 16, 17, 58, 83, 95, 97, 98, 99, 102, 104, 108, 117, 119, 125, 129, 138, 139, 140, 141, 144, 153, 164, 177], "technologi": [13, 18, 55, 60, 61, 63, 64, 66, 113, 156, 158, 162, 163], "tediou": 39, "tee": 65, "tekit_2025": 136, "tell": [27, 47, 64, 70, 72, 76, 96, 107, 143, 163, 172], "temb": 146, "temp": [72, 150], "temperatur": [0, 1, 9, 11, 18, 21, 27, 29, 32, 33, 35, 44, 46, 47, 48, 50, 55, 60, 61, 62, 63, 66, 71, 72, 73, 79, 86, 92, 94, 106, 113, 118, 136, 137, 144, 150, 156, 158, 159, 163, 164, 170, 180], "tempfil": 64, "templat": [0, 1, 24, 26, 27, 39, 67, 91, 129, 130, 159, 176], "tempor": [8, 95, 150], "temporari": [38, 64, 91, 92, 114, 176], "temporarili": 12, "temporarydirectori": 64, "ten": [7, 10, 14, 16, 125], "tenant": 97, "tend": [11, 19, 144], "tensor": [1, 2, 3, 4, 5, 6, 10, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 27, 29, 30, 32, 33, 34, 35, 45, 63, 64, 66, 67, 68, 69, 91, 92, 95, 96, 102, 104, 106, 108, 114, 118, 123, 128, 129, 130, 136, 139, 140, 141, 143, 145, 146, 147, 148, 150, 153, 155, 159, 160, 162, 164, 165, 167, 170, 176], "tensor_dict": 150, "tensor_input": 119, "tensor_parallel_s": [27, 31, 63, 67, 68, 69, 73, 103, 137, 138, 139, 143, 144, 159], "tensor_shap": 130, "tensorconstptr": 1, "tensorflow": 21, "tensorinfo": 150, "tensorloc": 145, "tensormap": 1, "tensorparallel": [0, 1, 118], "tensorptr": [0, 1], "tensorrt": [1, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 36, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49, 50, 51, 53, 54, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 86, 91, 92, 94, 96, 97, 98, 102, 105, 107, 111, 112, 113, 115, 117, 118, 119, 120, 122, 123, 126, 127, 134, 135, 139, 140, 141, 143, 144, 145, 148, 150, 153, 155, 157, 158, 159, 160, 163, 165, 166, 167, 168, 170, 181, 182], "tensorrt_llm": [0, 1, 2, 10, 18, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 42, 44, 45, 50, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 83, 86, 88, 89, 91, 94, 99, 102, 104, 106, 107, 108, 110, 111, 113, 115, 117, 118, 119, 122, 124, 126, 127, 129, 130, 131, 132, 133, 136, 137, 139, 143, 144, 145, 146, 147, 148, 149, 150, 155, 156, 157, 158, 159, 160, 163, 164, 165, 166, 167, 170, 173, 174, 176, 177, 179, 180, 181], "tensorrt_llm_gpt": 129, "tensorrt_llm_rouge1_threshold": 128, "tensorrtllm_backend": [122, 164], "tensortrt": 110, "tep": [92, 114], "tep4": 17, "term": [11, 12, 16, 17, 20, 34, 39, 112, 113, 129, 144, 145, 158], "termin": [0, 9, 11, 29, 30, 32, 33, 34, 35, 45, 110, 121, 133, 163, 164], "test": [1, 7, 11, 13, 14, 17, 19, 20, 22, 24, 27, 31, 44, 45, 47, 67, 73, 76, 83, 88, 89, 90, 103, 105, 107, 109, 110, 113, 117, 136, 137, 139, 140, 141, 142, 143, 144, 154, 159, 164, 172, 173, 174, 175, 181], "test_auto_dtype_with_helix": 95, "test_beam_search_larg": 164, "test_cas": 108, "test_cli_flow": 164, "test_data": 76, "test_disaggregated_serv": 95, "test_e2": 164, "test_generate_with_se": 164, "test_gpt_ib_ptun": 39, "test_graph_rewrit": 119, "test_list": 39, "test_llm_api": 38, "test_llm_openai_triton_1gpu": 39, "test_llm_qwen2audio_single_gpu": 39, "test_mla_helix": 95, "test_openai": 39, "test_qwen2audio": 39, "test_star_attention_input": 73, "test_text": 64, "test_triton": 39, "test_trt_llm": [126, 127, 128], "testb": 8, "testcas": 108, "testdeepseekv3lit": 95, "testgpt2": 164, "texec": [0, 92], "text": [0, 11, 17, 19, 22, 23, 26, 27, 29, 30, 32, 33, 38, 39, 42, 44, 45, 47, 50, 55, 56, 57, 58, 63, 64, 65, 71, 72, 73, 74, 76, 83, 94, 96, 100, 105, 106, 113, 115, 117, 118, 121, 136, 137, 144, 148, 150, 155, 156, 158, 159, 163, 164], "text0": 64, "text1": 64, "text_complet": [29, 32, 33], "text_diff": 159, "text_hidden_s": 147, "text_to_token": 66, "textattack": [154, 161], "textprompt": 159, "tg_group": 145, "tgt": [129, 145], "tgt_len": [145, 146], "tgt_seq_len": 145, "th": [1, 14, 128, 145], "than": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 36, 37, 42, 44, 45, 64, 65, 83, 92, 97, 98, 102, 103, 106, 108, 110, 114, 115, 117, 118, 119, 121, 125, 129, 136, 137, 138, 139, 142, 144, 145, 150, 151, 155, 159, 164, 167], "thank": [8, 10, 14, 16, 38, 164], "thankfulli": 10, "thecodewrangl": 164, "theft": 97, "thei": [0, 1, 10, 11, 12, 13, 14, 15, 16, 20, 21, 26, 30, 34, 38, 39, 40, 44, 83, 91, 96, 97, 100, 102, 107, 108, 110, 115, 117, 118, 122, 129, 130, 131, 136, 137, 139, 142, 143, 144, 145, 147, 153, 159, 164], "them": [0, 2, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 42, 43, 64, 67, 68, 69, 86, 91, 92, 95, 96, 102, 107, 108, 115, 116, 119, 125, 126, 135, 136, 138, 140, 141, 142, 144, 145, 150, 151, 159, 160, 165, 170, 176], "themselv": 39, "theoret": [10, 12, 16, 109, 151], "theori": [21, 144], "therebi": [92, 114, 144], "therefor": [2, 11, 19, 20, 45, 104, 108, 118, 126, 131, 145, 155, 181], "thermal": [44, 136], "theta": 145, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 50, 52, 55, 56, 57, 59, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 79, 82, 83, 86, 88, 89, 90, 91, 92, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 150, 151, 153, 155, 156, 157, 158, 159, 160, 163, 164, 165, 166, 167, 170, 172, 173, 174, 175, 176, 177, 178, 181, 182], "thin": 131, "thing": [17, 34, 55, 60, 61, 63, 102, 113, 118, 133, 142, 143, 156, 158, 163], "think": [11, 13, 14, 15, 24, 34, 36, 107, 140, 141, 161], "thinking_budget": 24, "third": [10, 11, 12, 17, 19, 39, 41, 97, 112, 113, 115, 159, 164], "this_modul": 64, "thorough": [16, 21], "those": [2, 12, 13, 14, 15, 16, 20, 23, 26, 27, 29, 30, 32, 33, 39, 43, 83, 86, 89, 96, 98, 102, 103, 115, 116, 117, 118, 128, 129, 135, 137, 143, 145, 146, 153, 159, 170, 174], "though": [10, 11, 14, 16, 17, 92, 102, 131, 142, 151], "thought": [11, 24, 34, 109], "thread": [0, 1, 10, 12, 16, 20, 50, 83, 96, 117, 123, 136, 150, 158, 159, 164], "three": [7, 8, 11, 12, 13, 15, 17, 22, 83, 100, 103, 108, 115, 128, 144, 145, 153, 159, 160, 165, 166, 167], "threshold": [0, 8, 13, 14, 38, 97, 145, 150, 159], "threw": 1, "throttl": [44, 136], "through": [0, 1, 2, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 23, 27, 30, 34, 35, 38, 41, 71, 83, 89, 91, 96, 97, 102, 103, 107, 110, 117, 118, 119, 123, 124, 125, 129, 130, 136, 137, 138, 139, 142, 143, 146, 162, 163, 164, 174, 176], "throughout": [8, 20, 137, 140, 141], "throughput": [0, 3, 4, 5, 9, 10, 12, 14, 16, 17, 18, 20, 28, 36, 42, 43, 68, 71, 83, 84, 92, 95, 99, 100, 101, 102, 103, 109, 114, 115, 117, 135, 139, 142, 143, 144, 164, 167, 168, 177, 178], "throw": [0, 1], "thrown": 1, "thu": [2, 10, 13, 15, 16, 20, 40, 107, 108, 110, 121, 131, 145, 151], "thumb": [117, 138, 155], "ti": [14, 21, 83, 117], "tid": 31, "tightli": 21, "tiiuae": [44, 136], "tile": 15, "tilen": 164, "time": [0, 1, 2, 5, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 34, 35, 42, 43, 44, 45, 55, 60, 61, 62, 63, 70, 92, 94, 97, 98, 101, 103, 107, 108, 109, 110, 113, 114, 115, 117, 121, 122, 123, 125, 126, 129, 135, 136, 137, 139, 140, 142, 144, 145, 148, 150, 155, 156, 158, 159, 162, 163, 164, 178, 181], "time_": 8, "time_embed_dim": 146, "time_encod": 150, "time_i": 8, "time_point": 0, "timedelta": 159, "timedout": 0, "timelin": [10, 17, 92, 128, 164], "timeout": [0, 8, 16, 27, 50, 92, 114, 159, 164], "timeout_it": [8, 30, 159], "timepoint": 0, "timestamp": 0, "timestep": [146, 147], "timestepembed": 146, "timezon": 94, "timingmetr": 0, "tini": 70, "tinyllama": [17, 27, 42, 46, 48, 49, 53, 55, 56, 57, 59, 60, 61, 62, 63, 66, 70, 71, 72, 75, 77, 79, 80, 85, 86, 89, 90, 92, 113, 156, 158, 163, 169, 170, 174, 175], "tip": [12, 109], "titl": [27, 59, 148, 159], "titlecas": 159, "tle": 126, "tllm": [93, 159, 161], "tllm_benchmark_req_queues_s": 20, "tllm_checkpoint_16gpu_tp8_pp2": 138, "tllm_ckpt_dir": 127, "tllm_engine_dir": 127, "tllm_kei": [130, 146], "tllm_llmapi_build_cach": 164, "tllm_llmapi_enable_nvtx": [43, 135], "tllm_log_level": [31, 65, 155], "tllm_nvtx_debug": [20, 43, 135], "tllm_override_layer_num": 164, "tllm_profile_record_gc": [20, 43, 135], "tllm_profile_start_stop": [20, 43, 135], "tllm_to_externel_key_dict": 130, "tllm_torch_profile_trac": [43, 135], "tllm_trace_model_forward": 164, "tllm_weight": 130, "tllmruntim": [1, 118, 155], "tlntin": 164, "tma": [12, 164], "tmp": [26, 29, 30, 31, 32, 33, 34, 35, 43, 44, 68, 84, 113, 122, 126, 135, 136, 138, 168], "tmp9so41y3r": [44, 136], "tmpf": 31, "tmpowsrb_f4": [44, 136], "tmpxhdvasex": [44, 136], "to_arrai": 145, "to_dict": [147, 159], "to_json": 159, "to_json_fil": 147, "to_layer_quant_config": 147, "to_legacy_set": 148, "to_python": 159, "to_str": [0, 1, 115], "to_trt": 147, "tobyt": 1, "todo": 145, "togeth": [3, 10, 11, 12, 13, 17, 18, 21, 23, 72, 83, 102, 115, 117, 118, 122, 129, 148, 150, 153, 162, 164], "toggl": [43, 65, 97, 105, 135], "toi": [102, 142], "toitensor": 0, "tojsonstr": 0, "tok": [3, 5, 6, 17, 26, 29, 30, 32, 33, 109, 143], "token": [0, 1, 2, 3, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 31, 34, 35, 41, 42, 43, 44, 45, 53, 54, 57, 59, 64, 65, 66, 68, 72, 73, 82, 83, 84, 85, 86, 92, 94, 95, 96, 97, 98, 100, 102, 103, 107, 108, 109, 115, 116, 117, 118, 120, 121, 125, 129, 132, 135, 136, 137, 139, 140, 143, 145, 146, 148, 150, 151, 153, 159, 160, 164, 165, 166, 167, 168, 169, 170], "token_count": 66, "token_drop": 146, "token_end": 159, "token_id": [29, 32, 33, 50, 106, 159], "token_ids_diff": 159, "token_norm_dist": 109, "token_range_retention_config": [57, 159], "token_start": 159, "token_type_id": [147, 150], "token_unif_dist": 109, "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 159, "tokenized_request": [29, 32, 33], "tokenizer_dir": [127, 129, 155, 159], "tokenizer_image_token": 150, "tokenizer_max_seq_length": [139, 147, 149, 159], "tokenizer_mod": 159, "tokenizer_revis": 159, "tokenizer_str": [0, 115], "tokenizerbas": 159, "tokenizerstr": [0, 115], "tokenlogprob": 159, "tokenrangeretentionconfig": [0, 57, 97, 159], "tokenrangeretentionprior": 0, "tokens_": 8, "tokens_i": 8, "tokens_per_block": [23, 64, 65, 73, 88, 95, 98, 120, 121, 148, 150, 159, 164, 181], "tokenselectedexpert": 12, "tokensperblock": [0, 1, 27, 118], "tokensperstep": 1, "tokensprompt": 159, "tokenstart": 0, "tokyo": [27, 47, 76], "toler": [7, 12, 16, 164], "tolist": 108, "toml": [91, 176], "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 164, "too": [0, 2, 10, 15, 16, 20, 27, 34, 38, 56, 92, 107, 108, 114, 115, 117, 137, 142, 150, 155, 159], "took": 137, "tool": [2, 10, 11, 15, 16, 20, 21, 22, 26, 27, 29, 30, 32, 33, 40, 94, 97, 100, 103, 128, 136, 162, 163, 164], "tool_cal": [18, 21, 30, 31, 34, 163], "tool_get_current_d": 94, "tool_get_current_weath": 94, "tool_pars": 27, "toolcal": 11, "toolkit": [7, 13, 18, 21, 29, 30, 31, 32, 33, 34, 35, 113, 131, 166], "toolset": 164, "top": [0, 8, 9, 10, 12, 14, 15, 16, 17, 24, 27, 32, 33, 39, 42, 72, 73, 82, 83, 92, 103, 106, 107, 108, 116, 117, 118, 125, 129, 145, 159, 164, 180], "top1": 13, "top_k": [9, 72, 86, 106, 118, 150, 159, 164, 170, 180], "top_k_valu": 72, "top_p": [9, 11, 21, 30, 31, 34, 35, 55, 60, 61, 62, 63, 66, 71, 72, 73, 106, 113, 118, 137, 144, 150, 156, 158, 159, 163, 180], "top_p_decai": [150, 159], "top_p_min": [150, 159], "top_p_reset_id": [150, 159], "top_p_valu": 72, "topenkoff": 164, "topic": [16, 26, 143], "topk": [0, 1, 13, 15, 20, 73, 116, 118, 125, 145, 159, 164], "topk_logit": 115, "topklastdim": 145, "topklogit": 115, "topkmedusahead": 1, "topktopp": [0, 118], "topmodelmixin": [131, 147], "topn": 13, "topologi": [16, 20], "topp": [0, 1, 118, 164], "toppdecai": [0, 1, 118], "toppmin": [0, 1, 118, 159], "toppresetid": [0, 1, 118], "topr": 159, "torch": [10, 64, 66, 83, 84, 85, 86, 88, 89, 90, 91, 93, 96, 105, 106, 109, 110, 113, 117, 130, 136, 145, 150, 155, 159, 160, 161, 164, 165, 168, 169, 170, 172, 173, 174, 175, 176], "torch_compile_config": [108, 159, 164], "torch_library_frag": 108, "torchcompileconfig": [109, 159], "torchinductor": 108, "torchllmarg": [26, 29, 30, 32, 33, 34, 35, 64, 109, 159, 164], "torchsampl": [9, 107, 159], "torchvis": 113, "toronto": 30, "tostr": [0, 1], "total": [0, 1, 2, 8, 10, 14, 16, 17, 20, 22, 23, 24, 27, 34, 35, 39, 44, 45, 67, 68, 69, 71, 73, 83, 102, 116, 117, 118, 125, 128, 130, 136, 137, 138, 151, 159, 181], "total_lat": [3, 6], "total_token": [18, 21, 29, 30, 31, 32, 33, 34, 35, 163], "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [132, 160, 165], "tourist": 34, "toward": [16, 95, 101, 105, 107, 178], "tower": 35, "tp": [0, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 18, 21, 22, 27, 41, 44, 45, 67, 68, 69, 84, 92, 95, 105, 114, 116, 118, 122, 129, 136, 137, 145, 159, 162, 164, 168], "tp1": [3, 4, 5, 10, 45], "tp2": [17, 45, 92, 136, 164], "tp4": [10, 12, 13, 30, 45], "tp4ep2": 13, "tp8": [5, 13, 15, 30, 45], "tp8ep2": 13, "tp_1_pp_1": 136, "tp_dim": [130, 146], "tp_group": [145, 146], "tp_plan": 170, "tp_rank": [130, 145, 146], "tp_size": [9, 18, 21, 22, 24, 26, 27, 30, 45, 51, 67, 69, 73, 116, 122, 128, 129, 130, 131, 136, 138, 145, 146, 149, 164], "tp_split_dim": 146, "tpot": [2, 6, 8, 17, 31, 34, 35, 45, 92], "tprank": 1, "tps_": 8, "tpsize": 1, "tqdm": [130, 159, 164], "trace": [16, 22, 23, 24, 27, 43, 109, 131, 135, 155, 159], "trace_head": 159, "track": [1, 16, 38, 40, 83, 89, 102, 117, 120, 145, 159, 174], "trade": [2, 15, 21, 42, 107, 108, 121], "tradeoff": [7, 13, 14, 109, 139], "tradit": [0, 8, 11, 95, 103], "traffic": [16, 17, 28, 29, 30, 36, 37, 84, 92, 97, 103, 168, 172], "trail": 159, "train": [4, 7, 11, 14, 19, 30, 107, 125, 127, 128, 129, 131, 136, 145, 155, 160, 165], "trainabl": [99, 177], "trait": 164, "trampolin": 10, "transa": 145, "transb": 145, "transceiv": [0, 159], "transfer": [0, 15, 16, 17, 20, 92, 96, 114, 129, 159, 164], "transfer_mod": [97, 159], "transferag": 41, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 20, 22, 23, 24, 27, 42, 50, 66, 83, 84, 86, 89, 90, 91, 100, 102, 108, 116, 117, 125, 127, 128, 129, 130, 147, 151, 154, 155, 159, 160, 164, 165, 166, 168, 170, 172, 173, 174, 175, 176, 181], "transformerstoken": 159, "transit": [8, 11], "translat": [20, 21, 40, 89, 99, 144, 159, 164, 174, 177], "transmiss": [0, 17, 20, 92, 109, 114, 123], "transmit": [12, 41, 92, 96, 114, 123], "transpar": [10, 16, 19, 20, 41], "transparent_hugepag": 20, "transport": 12, "transpos": [1, 128, 145], "transposit": 145, "travel": 30, "travers": [10, 129], "treat": [13, 20, 83, 98, 108, 117, 145, 159], "tree": [0, 9, 10, 11, 22, 30, 97, 107, 111, 136, 150, 155, 181], "tremend": 10, "trend": 19, "tri": [15, 108, 182], "tricki": 147, "trigger": [10, 12, 16, 20, 23, 38, 50, 83, 91, 94, 96, 108, 109, 117, 119, 129, 148, 158, 159], "trigger_completion_at_end": 145, "triggered_tag": 94, "trim": 1, "trimpool": 1, "trip": 34, "triton": [10, 39, 73, 86, 88, 90, 109, 121, 122, 125, 129, 159, 162, 164, 170, 173, 175], "triton_serv": 39, "tritonserv": 164, "trivial": 129, "troubleshoot": [109, 164], "trt": [0, 4, 11, 22, 27, 31, 52, 83, 88, 89, 90, 92, 94, 102, 104, 107, 108, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 129, 130, 132, 136, 142, 145, 147, 149, 150, 151, 155, 164, 167, 170, 173, 174, 175], "trt_ckpt": [122, 126, 128, 155], "trt_engin": [122, 126, 128, 155], "trt_llm_data": [40, 73], "trt_llm_disable_load_weights_in_parallel": 18, "trt_root": 2, "trt_tensor": [129, 145], "trtdatatyp": 1, "trtgptmodel": 151, "trtgptmodeloptionalparam": 164, "trtgptmodelv1": 164, "trtllm": [2, 9, 11, 12, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 33, 34, 35, 36, 40, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 58, 67, 73, 75, 76, 77, 78, 79, 80, 85, 90, 91, 98, 103, 108, 109, 114, 121, 122, 126, 127, 128, 129, 131, 136, 139, 140, 141, 142, 143, 151, 155, 159, 164, 169, 174, 175, 176], "trtllm_deep_ep_token_limit": 164, "trtllm_dg_jit_use_nvcc": 2, "trtllm_dir": [28, 29, 30, 32, 33, 34, 35, 36], "trtllm_disable_kv_cache_transfer_overlap": [92, 114], "trtllm_disable_unified_convert": 130, "trtllm_enable_kvcache_receive_parallel": [92, 114], "trtllm_enable_mmha_multi_block_debug": 136, "trtllm_enable_pdl": [2, 9, 12, 13, 14, 18, 21, 31, 136], "trtllm_force_xqa": [83, 117], "trtllm_kvcache_send_max_concurrency_num": [92, 114], "trtllm_kvcache_transfer_buffer_s": [92, 114], "trtllm_kvcache_transfer_use_async_buff": [92, 114], "trtllm_llama_eager_fusion_dis": 164, "trtllm_mmha_blocks_per_sequ": 136, "trtllm_mmha_kernel_block_s": 136, "trtllm_model": 130, "trtllm_modules_to_hf_modul": [44, 99, 136, 150, 177], "trtllm_pdl_overlap_ratio": 136, "trtllm_precompiled_loc": 110, "trtllm_prefetch_ratio": 136, "trtllm_repo": 31, "trtllm_request_kv_cache_concurr": [92, 114], "trtllm_server_disable_gc": 31, "trtllm_try_zcopy_for_kvcache_transf": [92, 114], "trtllm_use_precompil": 110, "trtllm_wheel_path": 31, "trtllm_worker_disable_gc": 31, "trtllmarg": [109, 159], "trtllmattent": [109, 167], "trtllmattentionwrapp": 164, "trtllmsampler": 159, "trtllmworker": 11, "trtlmmdatatyp": 0, "true": [0, 1, 2, 8, 9, 11, 13, 14, 15, 16, 18, 19, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 45, 50, 51, 62, 64, 65, 66, 68, 71, 72, 74, 82, 83, 85, 86, 92, 94, 96, 98, 101, 103, 106, 107, 108, 115, 118, 119, 121, 125, 128, 135, 136, 139, 143, 145, 146, 147, 148, 150, 151, 155, 159, 164, 169, 178], "true_output_valu": 145, "true_valu": 145, "truli": 20, "truncat": [24, 159, 164], "truncate_prompt_token": [159, 164], "truncation_sid": 11, "trust": [15, 22, 159], "trust_remote_cod": [9, 11, 18, 21, 22, 24, 26, 27, 31, 51, 159, 164], "truth": [89, 174], "try": [0, 1, 12, 16, 18, 19, 20, 21, 29, 30, 32, 33, 34, 35, 45, 56, 70, 79, 97, 100, 108, 111, 115, 127, 131, 139, 142, 143, 144, 151, 155, 157, 158, 159, 163, 170, 179], "tsuji": [44, 136], "ttensor": 1, "ttft": [8, 12, 17, 31, 34, 35, 44, 45, 92, 102, 139, 142, 143, 144, 164], "ttim": 164, "ttl": 13, "tunabl": [140, 141, 159], "tune": [0, 4, 7, 8, 13, 15, 16, 17, 21, 22, 23, 26, 28, 29, 30, 36, 37, 44, 45, 71, 99, 102, 103, 107, 109, 115, 125, 136, 139, 141, 143, 146, 147, 150, 151, 159, 162, 164, 172, 177], "tuner": 0, "tupl": [0, 1, 64, 96, 145, 146, 150, 159, 182], "turn": [10, 15, 17, 21, 98, 108, 110, 117, 118, 121, 125, 139, 150, 151, 164], "turn1": 19, "turn2": 19, "turnaround": 39, "tushar": 164, "tutori": [26, 94], "tweak": 144, "twice": [10, 64, 129], "two": [0, 4, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 30, 34, 36, 38, 39, 44, 47, 64, 65, 76, 83, 91, 92, 93, 94, 96, 97, 98, 102, 103, 106, 108, 109, 110, 114, 115, 116, 117, 118, 119, 121, 122, 123, 125, 126, 128, 129, 131, 136, 137, 139, 142, 144, 145, 146, 148, 158, 159, 161, 163, 164, 166, 170, 176, 180, 181, 182], "twofold": 125, "twoshot": [123, 145, 159], "txt": [2, 21, 22, 24, 39, 43, 44, 68, 84, 105, 113, 131, 135, 136, 137, 164, 168], "type": [1, 4, 7, 9, 11, 12, 15, 17, 18, 21, 22, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 44, 46, 47, 48, 49, 53, 54, 59, 64, 66, 70, 71, 72, 73, 74, 76, 79, 83, 85, 90, 91, 92, 94, 97, 102, 104, 106, 107, 108, 109, 114, 115, 117, 118, 119, 122, 128, 129, 136, 139, 143, 145, 147, 148, 149, 150, 153, 154, 155, 159, 160, 163, 164, 165, 166, 167, 169, 172, 175, 176, 181], "typedef": [0, 1], "typeerror": 159, "typenam": [0, 1, 129], "typetrait": 0, "typic": [0, 7, 12, 14, 15, 16, 17, 20, 26, 27, 29, 30, 32, 33, 34, 35, 92, 95, 100, 108, 114, 119, 127, 129, 131, 138, 139, 143, 144, 148, 150, 151, 158, 160, 164, 165], "typo": 164, "u": [1, 8, 12, 15, 16, 17, 29, 30, 32, 33, 34, 44, 45, 55, 60, 61, 62, 63, 108, 113, 119, 132, 136, 156, 158, 159, 163, 164], "ub": [123, 145, 159], "ub_oneshot": 136, "ub_tp_siz": 136, "ubuntu": [113, 157, 164], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucs_handle_error": 31, "ucsd": 11, "ucx": [0, 17, 31, 41, 92, 114, 159, 164], "ucx_cuda_ipc_enable_mnnvl": [92, 114], "ucx_max_rndv_rail": [92, 114], "ucx_net_devic": [92, 114], "ucx_rndv_schem": [92, 114], "ue8m0": 104, "uid": [0, 40, 150], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 145], "uint64": [1, 121], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 15, "uk_bgemm": 13, "ulimit": [9, 18, 21, 26, 110, 155, 163], "ultim": 138, "ultra": [90, 175], "ulyss": 164, "unabl": 142, "unaccept": 139, "unaffect": [20, 170], "unaligneddata": 12, "unari": 145, "unaryoper": 145, "unbind": 145, "unblock": [10, 20], "uncas": [154, 159, 161], "uncertain_word": 11, "uncertainti": [82, 125], "unchang": [16, 39, 86, 108, 125, 143, 145, 170], "uncom": 40, "uncommit": 38, "uncommon": 129, "undefin": 145, "under": [0, 7, 8, 9, 17, 19, 20, 21, 23, 32, 33, 38, 39, 44, 45, 89, 92, 98, 100, 105, 110, 136, 155, 158, 159, 164, 174], "undergo": [89, 90, 174, 175], "underli": [0, 1, 16, 17, 41, 91, 92, 96, 119, 125, 159, 176], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 139, "understand": [8, 16, 39, 41, 43, 44, 86, 102, 103, 110, 135, 170], "understood": [142, 159], "underutil": [20, 125], "underwai": [17, 96], "uneven": [8, 164], "unevenli": 13, "unexpect": [10, 20, 155, 159, 164], "unfinish": 0, "unfortun": 20, "unfus": [145, 164], "unfuse_qkv_project": 147, "ungath": 1, "unguid": 59, "unicast": 1, "unicastconfigur": 1, "unicod": 159, "unicodeencodeerror": 159, "unidirect": 41, "unif": 164, "unifi": [7, 24, 89, 91, 128, 131, 159, 164, 174, 176], "uniform": [8, 22, 44, 45, 136, 145, 170], "uniformli": 8, "uniniti": [83, 167], "uninstal": 113, "union": [145, 159], "uniqu": [0, 1, 44, 64, 91, 96, 98, 117, 118, 120, 122, 125, 128, 136, 159, 176], "unique_ptr": [0, 1], "unique_reply_rank": 159, "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": [0, 1], "unit": [1, 15, 18, 21, 29, 30, 31, 32, 33, 34, 38, 44, 55, 56, 57, 64, 94, 109, 110, 113, 120, 130, 136, 137, 144, 156, 158, 163, 164], "unittest": [38, 39, 73, 95], "univers": [8, 30, 55, 60, 61, 63, 113, 156, 158, 163], "unknown": [1, 22, 159], "unleash": 19, "unless": [0, 34, 38, 50, 102, 138, 143, 144, 159], "unlik": [14, 34, 64, 86, 95, 106, 108, 121, 125, 170], "unlock": 16, "unmatch": 12, "unnecessari": [10, 11, 34, 119, 160, 164, 165, 182], "unneed": [13, 83, 117], "unordered_map": [0, 1, 115], "unpack": 10, "unpatchifi": 147, "unpredict": 8, "unregist": 0, "unrol": 108, "unsaf": [92, 114], "unsaferemov": 1, "unsatisfactori": 20, "unschedul": [102, 142], "unset": [16, 92, 114, 144], "unshard": 170, "unsign": 1, "unspecifi": [23, 24, 27, 145], "unsqueez": [1, 145], "unstabl": [38, 131, 159], "unsuit": 96, "unsupport": [39, 108, 164], "untest": [93, 161], "until": [0, 1, 8, 10, 11, 12, 16, 26, 29, 30, 32, 33, 97, 107, 115, 118, 121, 125, 159], "untouch": [145, 159], "unus": [0, 44, 113, 136], "up": [0, 2, 4, 5, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 27, 29, 30, 31, 32, 33, 34, 35, 41, 42, 44, 59, 64, 66, 83, 91, 92, 102, 107, 108, 109, 114, 117, 118, 122, 125, 136, 142, 143, 148, 159, 163, 164, 176, 181], "up_proj": [130, 170], "upcast": 145, "upcast_attent": 146, "upcast_softmax": 146, "upcom": [7, 89, 174, 181], "updat": [0, 2, 5, 10, 14, 15, 16, 18, 19, 23, 34, 35, 42, 89, 91, 96, 100, 101, 103, 108, 110, 120, 125, 129, 130, 131, 132, 145, 148, 150, 155, 159, 174, 176, 178, 181], "update_forward_ref": 159, "update_key_map": 130, "update_kv_cache_typ": 159, "update_output_ids_by_offset": 150, "update_resourc": [166, 181], "update_state_after_alloc": [64, 96], "update_strategi": 145, "update_weight": 159, "updatenumreturnbeam": 0, "updatespositionid": 1, "upfront": 34, "upgrad": [41, 89, 113, 164, 174], "uplift": [139, 142, 143], "upon": [1, 10, 12, 18, 20, 41, 42, 45, 91, 125, 143, 155, 164], "upper": [8, 74, 108, 136, 145, 151, 159], "uppercas": 159, "upsampl": 12, "upstat": 34, "uq_qr_gemm": 13, "uri": 27, "url": [17, 27, 47, 53, 54, 76, 92, 110, 113, 159, 164], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 41, 42, 43, 44, 45, 50, 52, 55, 57, 58, 59, 62, 64, 65, 67, 68, 69, 70, 72, 73, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 94, 97, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 123, 124, 128, 129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 140, 141, 142, 145, 146, 147, 148, 150, 153, 155, 156, 157, 159, 160, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 180, 181, 182], "usabl": [11, 20, 157, 159], "usag": [0, 3, 6, 15, 17, 18, 20, 21, 22, 23, 27, 29, 30, 31, 32, 33, 34, 35, 38, 55, 67, 68, 69, 71, 73, 82, 83, 94, 97, 98, 103, 109, 111, 114, 117, 119, 120, 129, 131, 136, 143, 144, 145, 156, 158, 159, 163, 164, 167], "use_beam_hyp": 150, "use_beam_search": [72, 106, 159, 164], "use_cach": [145, 146, 147], "use_context_fmha_for_gener": 164, "use_cuda_graph": 73, "use_custom_all_reduc": 164, "use_diff_of_squar": 145, "use_dynamic_tre": 159, "use_embedding_shar": 164, "use_fast": 11, "use_fp32_acc": 145, "use_fp8": 146, "use_fp8_context_fmha": [23, 117, 136, 148, 164], "use_fused_mlp": [23, 136, 148, 164], "use_gemm_allreduce_plugin": 150, "use_gpt_attention_plugin": 150, "use_gpu_direct_storag": 150, "use_implicit_relative_attent": 146, "use_kv_cach": [146, 150, 164], "use_logn_sc": 146, "use_lora": 147, "use_lora_plugin": 150, "use_low_precision_moe_combin": [31, 159], "use_mamba_conv1d_plugin": 150, "use_meta_recip": 159, "use_modelopt_quant": 131, "use_mrop": 159, "use_mtp_vanilla": 159, "use_nv_sa_benchmark": 31, "use_one_more_block": 150, "use_paged_context_fmha": [23, 83, 117, 121, 136, 139, 143, 148], "use_parallel_embed": [128, 129, 147], "use_preload": 147, "use_prompt_tun": [147, 164], "use_py_sess": 155, "use_refit": 159, "use_relaxed_acceptance_for_think": [13, 14, 74, 107, 159], "use_runtime_default": 150, "use_safetensors_load": 147, "use_strip_plan": 159, "use_torch_sampl": 9, "use_tqdm": 159, "use_uvm": [97, 159], "use_variable_beam_width_search": 150, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": [0, 27], "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 118], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 42, 43, 44, 45, 46, 47, 59, 75, 76, 79, 83, 86, 91, 92, 94, 95, 97, 98, 102, 103, 108, 110, 112, 113, 114, 115, 117, 118, 119, 121, 122, 123, 124, 129, 130, 131, 132, 135, 136, 142, 143, 144, 145, 147, 148, 151, 153, 155, 158, 159, 162, 163, 164, 170, 172, 176], "user_buff": [23, 139, 148], "user_prompt": 94, "user_provid": [147, 159], "userandomacceptancethreshold": 1, "userbuff": [108, 159, 164], "userepetitionpenalti": 0, "usernam": [9, 18], "userprovideddecodingconfig": [107, 109, 159], "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "useuvm": 0, "usevariablebeamwidthsearch": 0, "using_oss_cutlass_": 124, "using_oss_cutlass_low_latency_gemm": 124, "using_oss_cutlass_moe_gemm": 124, "usr": [2, 27, 30, 31, 34, 35, 46, 47, 48, 49, 51, 53, 54, 128, 136], "usual": [14, 34, 45, 98, 107, 113, 129, 131, 137, 143, 145, 148, 159, 181], "utf": 159, "utf8": 159, "util": [0, 1, 2, 3, 8, 11, 12, 13, 15, 16, 17, 18, 21, 23, 24, 27, 29, 30, 32, 33, 34, 35, 42, 43, 44, 55, 71, 83, 84, 86, 92, 95, 98, 100, 101, 102, 107, 117, 118, 125, 129, 135, 136, 139, 143, 144, 148, 151, 159, 162, 164, 167, 168, 170, 178], "uuid": 159, "uv": 15, "uv_gemm": 13, "uvicorn": [26, 31], "uvm": [0, 1, 20, 159], "v": [1, 2, 3, 4, 7, 9, 11, 13, 15, 18, 19, 21, 29, 30, 32, 33, 34, 38, 40, 83, 95, 102, 109, 117, 118, 122, 145, 150, 153, 154, 155, 160, 161, 165, 167], "v0": [3, 4, 5, 6, 44, 45, 90, 101, 102, 122, 136, 154, 161, 164, 175, 178], "v1": [9, 12, 17, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 42, 45, 46, 47, 48, 49, 53, 55, 56, 57, 59, 60, 61, 62, 63, 66, 70, 71, 72, 75, 76, 77, 78, 79, 80, 85, 86, 89, 90, 92, 94, 99, 113, 154, 156, 158, 161, 163, 164, 169, 170, 172, 174, 175, 177], "v10": 164, "v100": 164, "v12": 164, "v2": [2, 7, 12, 15, 24, 28, 29, 36, 38, 90, 91, 104, 153, 154, 164, 175, 176], "v3": [8, 14, 16, 24, 27, 43, 95, 103, 107, 135, 153, 154, 161, 162, 164], "v9": 5, "v_dim": 145, "v_head_dim": [145, 146], "v_proj": [44, 91, 99, 130, 136, 160, 165, 170, 176, 177], "vacat": [55, 113, 156, 158, 163], "valid": [0, 1, 8, 10, 12, 14, 16, 20, 29, 30, 32, 33, 45, 86, 90, 91, 92, 95, 98, 103, 107, 115, 125, 145, 148, 150, 159, 164, 170, 172, 175, 176], "validate_and_init_token": 159, "validate_assign": 148, "validate_attention_dp_config": 159, "validate_batch_wait_max_tokens_ratio": 159, "validate_batch_wait_timeout_it": 159, "validate_batch_wait_timeout_m": 159, "validate_build_config_remain": 159, "validate_build_config_with_runtime_param": 159, "validate_capture_num_token": 159, "validate_checkpoint_format": 159, "validate_cuda_graph_config": 159, "validate_cuda_graph_max_batch_s": 159, "validate_draft_len_schedule_and_sort": 159, "validate_dtyp": 159, "validate_dtype_not_auto": 148, "validate_enable_build_cach": 159, "validate_free_gpu_memory_fract": 159, "validate_gpus_per_nod": 159, "validate_helix_tokens_per_block": 159, "validate_kv_cache_dtyp": 159, "validate_load_balanc": 159, "validate_lora_config_consist": 159, "validate_max_attention_window": 159, "validate_max_gpu_total_byt": 159, "validate_misc": 159, "validate_model": 159, "validate_model_format_misc": 159, "validate_parallel_config": 159, "validate_peft_cache_config": 159, "validate_positive_valu": 159, "validate_quant_config": 159, "validate_ray_placement_config": 159, "validate_ray_worker_extension_cl": 159, "validate_runtime_arg": 159, "validate_speculative_config": 159, "validate_stream_interv": 159, "validate_torch_compile_config": 159, "validate_torch_compile_max_num_stream": 159, "validatevec": 1, "validationerror": 159, "validmpiconfig": 1, "valu": [0, 1, 2, 3, 4, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 42, 44, 50, 65, 66, 71, 72, 82, 83, 85, 86, 92, 94, 96, 97, 98, 100, 102, 107, 108, 114, 117, 118, 120, 121, 122, 123, 126, 128, 129, 130, 136, 137, 139, 142, 144, 145, 147, 148, 149, 150, 151, 153, 155, 159, 164, 167, 169, 170, 181, 182], "valuabl": [8, 13, 16, 17], "value_typ": 0, "valueerror": [73, 74, 159], "valuestatu": 1, "vanilla": [73, 83, 117, 159, 164, 167], "vanillaattent": [83, 167], "var": [145, 159], "vari": [5, 8, 11, 16, 17, 18, 20, 26, 29, 30, 32, 33, 92, 108, 142, 143, 161, 181], "variabl": [0, 1, 2, 5, 12, 13, 16, 17, 20, 29, 40, 43, 44, 67, 68, 69, 87, 97, 108, 109, 113, 118, 120, 130, 135, 136, 155, 158, 159, 164, 171], "variabledraftlength": 1, "varianc": [8, 15, 139, 142, 143, 145], "variant": [0, 3, 10, 14, 15, 21, 45, 83, 105, 107, 108, 115, 117, 131, 145, 164, 167], "variat": 8, "varieti": [44, 90, 96, 100, 104, 136, 137, 164, 175], "variou": [11, 16, 17, 20, 22, 26, 27, 36, 40, 44, 45, 71, 72, 83, 85, 89, 90, 91, 100, 110, 117, 125, 136, 139, 142, 158, 159, 162, 163, 164, 169, 174, 175], "varnam": 1, "vartyp": 1, "vastli": 8, "vboost": [2, 13, 44, 136], "vbw": 164, "ve": [12, 13, 70, 108], "vec": [0, 1], "vec2": 145, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 15, 115, 117, 118, 120, 122, 145], "vecuniquetoken": [0, 1], "vehicl": 18, "verb": 38, "verbatim": 147, "verbos": [22, 23, 24, 27, 44, 87, 136, 159, 171], "veri": [7, 12, 14, 16, 18, 19, 20, 21, 29, 30, 31, 32, 33, 34, 35, 65, 83, 102, 107, 117, 128, 129, 137, 138, 139, 164], "verif": [0, 10, 11, 14, 19, 125, 159], "verifi": [9, 10, 12, 14, 18, 19, 20, 21, 24, 34, 35, 39, 64, 65, 95, 125, 143, 145, 164], "verification_batch": 19, "verificationsets": 0, "verl": 105, "vermont": 30, "versa": [15, 121], "version": [0, 1, 2, 10, 12, 13, 15, 16, 20, 21, 24, 27, 29, 32, 33, 38, 40, 44, 50, 83, 97, 102, 107, 110, 111, 113, 114, 117, 118, 128, 130, 131, 136, 137, 145, 155, 157, 159, 164], "versu": [11, 34], "vertic": [108, 145], "vertical_strid": 146, "vgqa": 120, "via": [0, 10, 13, 16, 17, 20, 21, 26, 29, 30, 32, 33, 34, 35, 38, 39, 44, 67, 68, 69, 70, 82, 86, 88, 89, 90, 92, 107, 108, 110, 112, 123, 124, 125, 136, 139, 140, 141, 143, 144, 145, 157, 159, 163, 164, 170, 173, 174, 175], "vice": [15, 121], "vicuna": 125, "video": [22, 27, 44, 47, 76, 136, 150, 154, 161, 164], "video_grid_thw": 150, "video_path": 150, "video_preprocess": 150, "video_url": [27, 47, 76], "view": [1, 10, 14, 16, 18, 34, 86, 108, 145, 150, 170], "vila": [27, 47, 76, 104, 152, 153, 154, 161, 162, 164], "vinyl": [44, 136], "violat": [10, 164], "virtual": [0, 1, 146], "virtualmemorymanagertest": 1, "vision": [21, 22, 89, 100, 104, 150, 153, 154, 159, 161, 162, 164, 174], "vision_grid_thw": 150, "vision_length": 145, "vision_model_typ": 147, "vision_start": 145, "vision_token_mask": 146, "visit": [13, 89, 125, 164, 174], "visual": [8, 18, 40, 102, 142, 159, 164], "visual_engine_dir": 150, "visual_featur": 150, "visualize_network": [23, 159, 164], "vit": [159, 164], "vital": [7, 119], "vl": [26, 27, 44, 47, 54, 76, 100, 136, 152, 154, 162, 164], "vllm": [10, 11, 164], "vlm": [89, 154, 164, 174], "vocab": [107, 145, 150], "vocab_embed": [127, 130], "vocab_s": [0, 10, 12, 128, 130, 146, 147, 150, 159, 160, 165], "vocab_size_pad": 150, "vocabs": [1, 118], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 10, 45, 118, 121, 125, 146, 150], "void": [0, 1, 12, 115, 129], "volatil": 8, "volta": 164, "volum": [1, 8, 44, 109, 110, 123, 136], "volumenonneg": 1, "vonjackustc": 164, "vote": [55, 113, 156, 158, 163], "vram": 65, "vswa": 120, "vt": 34, "vulner": 164, "vultureprim": 164, "w": [1, 2, 6, 9, 10, 13, 15, 18, 19, 21, 22, 27, 29, 30, 31, 32, 33, 34, 35, 92, 94, 145, 147, 153, 154, 164], "w1": 145, "w4a": [153, 164], "w4a16": [7, 22, 104, 128, 147, 159], "w4a16_awq": [22, 50, 128, 131, 159], "w4a16_gptq": [22, 128, 159], "w4a16_mxfp4": 159, "w4a8": [7, 104, 164], "w4a8_awq": [22, 128, 131, 159], "w4a8_mxfp4_fp8": [159, 164], "w4a8_mxfp4_mxfp8": 159, "w4a8_nvfp4_fp8": 159, "w4a8_qserve_per_channel": 159, "w4a8_qserve_per_group": 159, "w4aint8": 164, "w8a": 153, "w8a16": [7, 22, 128, 147, 159], "w8a16_gptq": 159, "w8a8": [4, 7], "w8a8_sq_per_channel": [128, 159], "w8a8_sq_per_channel_per_tensor_plugin": [147, 159], "w8a8_sq_per_channel_per_token_plugin": [147, 159], "w8a8_sq_per_tensor_per_token_plugin": [147, 159], "w8a8_sq_per_tensor_plugin": [147, 159], "wa": [0, 1, 8, 10, 12, 14, 15, 16, 20, 30, 41, 44, 45, 65, 83, 96, 97, 102, 111, 113, 115, 117, 118, 128, 136, 137, 139, 142, 143, 144, 146, 153, 155, 159, 160, 164, 165, 182], "wai": [10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 31, 44, 63, 64, 66, 83, 92, 102, 107, 111, 112, 117, 118, 119, 123, 136, 137, 139, 145, 151, 158, 164], "wait": [0, 1, 10, 11, 15, 16, 20, 26, 31, 42, 50, 96, 101, 115, 131, 136, 145, 159, 162, 178], "wait_ev": 108, "wait_event_1": 108, "wait_event_2": 108, "wait_for_layer_load": [64, 96], "wait_for_sav": [64, 96], "waiv": 109, "walk": [18, 21, 27, 47, 70, 76, 137, 138, 139], "walkthrough": 96, "wall": 34, "wang1120": 164, "wangkuiyi": 164, "want": [2, 11, 13, 14, 16, 18, 20, 21, 29, 30, 31, 32, 33, 34, 35, 43, 44, 64, 83, 91, 98, 102, 110, 117, 125, 131, 133, 135, 136, 139, 142, 144, 145, 155, 159, 160, 164, 165, 170], "war": 1, "warm": [22, 34, 92, 114, 181], "warmup": [2, 16, 20, 21, 22, 43, 83, 135, 136, 137, 164, 167, 181], "warn": [22, 23, 24, 26, 27, 38, 45, 66, 83, 87, 117, 136, 151, 159, 171], "warn_on_unstable_feature_usag": 159, "warp": [123, 164], "washington": 30, "wast": [11, 15, 20, 39, 42], "watch": 143, "watt": 21, "wdkv": 13, "wdq": 13, "we": [1, 2, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 29, 30, 32, 33, 34, 35, 36, 41, 43, 44, 45, 55, 60, 61, 63, 64, 70, 89, 90, 92, 97, 98, 102, 105, 107, 108, 110, 113, 114, 116, 118, 119, 122, 123, 124, 125, 126, 128, 131, 132, 133, 135, 136, 137, 138, 139, 142, 143, 145, 150, 155, 156, 158, 159, 160, 163, 164, 165, 174, 175], "weather": 94, "web": 133, "weekli": [29, 30, 32, 33], "weig": 145, "weight": [0, 1, 3, 4, 7, 9, 12, 13, 14, 16, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 45, 63, 71, 84, 85, 90, 99, 103, 104, 116, 122, 131, 137, 138, 139, 145, 146, 147, 148, 150, 159, 162, 164, 168, 169, 170, 175, 177], "weight_index": 145, "weight_load": [91, 146, 176], "weight_mapp": [91, 176], "weight_only_groupwise_quant_matmul": 153, "weight_only_groupwise_quant_matmul_plugin": 148, "weight_only_precis": 164, "weight_only_quant_matmul_plugin": 148, "weight_spars": [23, 159], "weight_stream": [23, 126, 159], "weightonlygroupwisequantmatmulplugin": 153, "weights_dict": [91, 131, 176], "weights_scaling_factor": [128, 130], "weightsinpoint": 1, "weightsoutpoint": 1, "welcom": [11, 16, 89, 105, 174], "well": [4, 10, 12, 16, 19, 20, 21, 41, 43, 50, 83, 97, 102, 114, 117, 118, 129, 135, 142, 143, 153, 154, 159, 163], "were": [0, 1, 3, 7, 10, 12, 15, 17, 20, 45, 102, 107, 124, 125, 128, 131, 138, 142, 159, 164], "weren": 113, "west": 30, "wget": [24, 155], "what": [11, 15, 16, 21, 27, 30, 34, 35, 38, 39, 43, 44, 47, 64, 67, 68, 69, 70, 72, 74, 76, 82, 92, 94, 96, 97, 99, 102, 109, 110, 114, 115, 135, 136, 137, 139, 142, 143, 148, 159, 177], "whatev": 1, "wheel": [31, 110, 112, 113, 164], "when": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 43, 44, 50, 56, 57, 64, 65, 66, 83, 86, 92, 94, 96, 97, 98, 102, 103, 104, 106, 107, 108, 109, 110, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 125, 129, 130, 131, 132, 135, 136, 137, 139, 142, 143, 144, 145, 146, 147, 148, 150, 151, 153, 155, 159, 160, 162, 164, 165, 167, 170, 180, 181], "whenev": [1, 38, 83], "where": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 20, 22, 27, 29, 30, 31, 32, 33, 34, 42, 44, 45, 46, 48, 49, 50, 57, 65, 70, 75, 77, 80, 82, 83, 84, 92, 94, 96, 97, 98, 103, 107, 114, 117, 118, 120, 121, 123, 124, 125, 128, 129, 136, 139, 142, 144, 145, 150, 153, 159, 163, 164, 168, 170, 172, 182], "wherea": [0, 17, 92, 128, 142], "whether": [0, 1, 8, 11, 12, 16, 17, 19, 23, 24, 31, 41, 71, 83, 85, 92, 97, 115, 117, 122, 138, 139, 143, 145, 146, 148, 150, 159, 162, 166, 167, 169, 170], "which": [0, 1, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 56, 64, 65, 66, 71, 72, 83, 85, 86, 89, 90, 91, 92, 94, 95, 96, 97, 98, 100, 102, 103, 104, 106, 107, 108, 110, 114, 115, 116, 117, 118, 119, 121, 122, 125, 128, 129, 130, 131, 135, 136, 137, 139, 142, 143, 144, 145, 147, 148, 150, 151, 153, 158, 159, 163, 164, 166, 167, 169, 170, 174, 175, 176, 179, 180, 182], "while": [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 29, 30, 32, 33, 34, 35, 39, 42, 44, 65, 83, 89, 91, 92, 98, 101, 105, 108, 113, 114, 116, 119, 120, 121, 123, 124, 125, 129, 131, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 151, 153, 158, 164, 167, 174, 176], "whisper": [153, 154, 164], "whisperencod": 147, "whitespac": 159, "whl": [2, 110, 113], "who": [10, 11, 12, 14, 34, 41, 158], "whole": [1, 11, 34, 98, 108, 145, 159], "whose": [8, 10, 13, 16, 17, 19, 39, 72, 92, 114, 121, 128, 146, 159], "why": [0, 15, 18, 21, 39, 102, 129, 139, 142, 143, 151, 159], "wide": [0, 12, 14, 18, 90, 108, 109, 137, 162, 164, 175], "wide_ep": [20, 103], "wideep": [29, 31, 73, 159], "widespread": 116, "width": [0, 1, 26, 54, 83, 90, 92, 106, 117, 118, 146, 150, 151, 159, 164, 175], "wildcard": [39, 170], "win": 159, "window": [0, 1, 8, 19, 23, 27, 44, 73, 93, 109, 120, 125, 136, 145, 148, 150, 159, 161, 164], "window_s": [73, 117, 159], "windows": 0, "wip": [13, 172], "wireless": 59, "wirelessaccesspoint": 59, "wise": [16, 20, 103, 104, 119, 145, 159, 164, 170], "wish": [41, 91, 110, 121], "with_ssh": 132, "with_traceback": 159, "within": [8, 10, 11, 12, 15, 16, 20, 21, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 44, 72, 83, 92, 97, 100, 108, 109, 114, 117, 120, 123, 125, 129, 136, 138, 139, 142, 143, 145, 159, 163, 181], "without": [0, 1, 2, 7, 10, 11, 12, 13, 16, 17, 19, 20, 23, 24, 29, 30, 34, 38, 39, 42, 50, 64, 65, 66, 83, 89, 90, 96, 98, 99, 101, 102, 105, 115, 117, 123, 125, 129, 130, 136, 139, 143, 145, 147, 159, 160, 164, 165, 167, 172, 174, 175, 177, 178], "wkr": 13, "wo": [13, 130, 164], "wo_gemm": [13, 20], "won": [20, 34, 35, 108, 138, 150, 159], "word": [0, 11, 106, 115, 117, 118, 145, 150, 159, 162, 164, 180], "word_dict": 150, "word_embed": 130, "word_embeddings_layernorm": 130, "work": [2, 8, 10, 12, 16, 19, 21, 22, 30, 31, 34, 35, 41, 42, 44, 45, 50, 64, 67, 68, 69, 74, 83, 91, 92, 97, 98, 99, 108, 109, 110, 111, 113, 114, 117, 118, 119, 120, 123, 125, 129, 131, 140, 141, 145, 150, 153, 155, 159, 160, 164, 165, 177], "work_dir": 31, "workaround": [2, 21, 29, 130, 158, 164], "workdir": [27, 67, 68, 69, 110], "worker": [17, 23, 27, 31, 42, 92, 105, 107, 129, 136, 151, 159, 164], "worker_cl": 11, "worker_config": 31, "worker_env_var": 31, "worker_not_support": 11, "worker_tag": 11, "workerexecutablepath": 0, "workertag": 11, "workflow": [2, 11, 12, 14, 16, 17, 24, 45, 50, 83, 84, 86, 89, 90, 92, 105, 108, 109, 117, 118, 127, 128, 137, 139, 140, 141, 145, 155, 158, 159, 164, 168, 170, 174, 175], "workload": [8, 10, 11, 12, 15, 16, 17, 18, 19, 20, 23, 26, 43, 44, 45, 71, 84, 92, 95, 102, 103, 108, 116, 123, 129, 135, 136, 137, 139, 140, 141, 142, 143, 162, 168], "workspac": [1, 12, 16, 22, 23, 27, 40, 44, 99, 136, 145, 151, 159, 164, 177], "workstat": 4, "world": [0, 2, 8, 14, 16, 21, 30, 41, 44, 85, 86, 98, 99, 119, 136, 137, 138, 139, 145, 162, 169, 170, 177], "world_config": 150, "world_siz": [86, 88, 128, 131, 145, 164, 170, 172, 173], "worldconfig": [0, 118, 150], "worldsiz": 1, "worri": [11, 108], "wors": [20, 23, 125, 139, 148], "worst": [16, 102, 108, 142, 143], "worth": [117, 120, 139, 143], "would": [0, 10, 12, 14, 16, 44, 92, 94, 102, 107, 108, 113, 119, 125, 136, 137, 139, 142, 144, 145, 159, 160, 165], "wpa2": 59, "wqr": 13, "wrap": [0, 1, 23, 108, 129, 137, 145, 148, 150, 158, 164], "wrapped_properti": 159, "wrapper": [1, 10, 16, 29, 30, 31, 32, 33, 34, 35, 67, 83, 90, 108, 119, 131, 167, 175], "write": [0, 1, 11, 12, 13, 16, 23, 41, 64, 72, 108, 121, 130, 145, 155, 164, 172], "write_interv": 159, "written": [11, 12, 22, 108, 129, 136, 145], "wrong": [125, 164], "wsl": 164, "wuk": 13, "wuq": 13, "wuv": 13, "www": 164, "x": [0, 1, 9, 10, 16, 26, 27, 29, 30, 31, 32, 33, 34, 35, 45, 97, 108, 110, 111, 115, 118, 122, 126, 136, 145, 146, 147, 153, 159, 163, 164], "x64": 21, "x86": 121, "x86_64": 154, "xcomposer2": 164, "xf": 24, "xgrammar": [0, 10, 59, 79, 94, 115, 159, 164], "xl": 164, "xml": 115, "xmlcharrefreplac": 159, "xor": 145, "xqa": [109, 164], "xxx": [130, 131, 155], "xxx_plugin": 148, "xxxconfig": 38, "xxxxx": 31, "xy": 145, "y": [2, 6, 10, 11, 16, 44, 89, 104, 108, 110, 111, 113, 115, 132, 136, 145, 147, 153, 159, 174], "y_bia": 145, "yaml": [9, 16, 17, 18, 20, 21, 22, 24, 26, 28, 31, 36, 38, 39, 44, 45, 52, 79, 91, 92, 94, 103, 107, 136, 164, 176], "yarn": [145, 164], "ye": [31, 34, 92, 93, 95, 114, 145, 151, 152, 161], "yeah": [34, 70], "yelp": [154, 161], "yen": [44, 136], "yet": [0, 4, 12, 13, 16, 19, 27, 38, 92, 111, 118, 131, 145, 172, 182], "yield": [8, 11, 15, 19, 50, 121, 139, 142], "yiyixu": [27, 47, 76], "yml": [2, 14, 26, 27, 29, 30, 32, 33, 34, 35, 39, 40, 45, 51, 92, 108, 136], "york": [27, 29, 30, 31, 32, 33, 34, 46, 48, 49, 75, 77, 80, 94, 163, 172], "you": [2, 7, 9, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 39, 41, 44, 45, 46, 47, 50, 52, 56, 57, 59, 64, 67, 68, 69, 70, 74, 75, 76, 79, 83, 84, 85, 86, 89, 91, 92, 94, 96, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109, 110, 111, 113, 115, 116, 117, 118, 119, 121, 122, 124, 125, 128, 129, 131, 132, 133, 136, 138, 139, 140, 141, 142, 143, 144, 145, 150, 151, 155, 157, 158, 159, 160, 163, 164, 165, 167, 168, 169, 170, 172, 174, 176, 177, 179], "your": [2, 7, 9, 11, 12, 14, 16, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 43, 44, 45, 50, 52, 56, 67, 68, 69, 70, 71, 72, 83, 84, 85, 86, 89, 90, 91, 92, 94, 102, 103, 106, 108, 110, 113, 114, 121, 122, 123, 125, 131, 132, 133, 135, 136, 137, 138, 139, 140, 141, 142, 143, 155, 158, 159, 160, 163, 165, 167, 168, 169, 170, 174, 175, 176, 181], "your_data_path": [2, 14], "your_dockerhub_usernam": [132, 133], "your_model_dir": 14, "your_model_path": [2, 16], "your_public_kei": 133, "your_work_path": 2, "yourself": [84, 168, 179], "yuhuili": 74, "yyi": 155, "z": [11, 110, 111, 145], "zars19": 164, "zero": [0, 1, 10, 65, 101, 115, 130, 145, 146, 153, 155, 158, 159, 178], "zero_is_placehold": 145, "zfill": 159, "zhang": 11, "zhuang": 11, "zip": 64, "zjli2013": 164, "zoo": [21, 66, 164], "zoom": [8, 16], "\u00b5": 20, "\u7f8e\u56fd\u7684\u9996\u90fd\u5728\u54ea\u91cc": 78}, "titles": ["Executor", "Runtime", "How to get best performance on DeepSeek-R1 in TensorRT LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "ADP Balance Strategy", "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)", "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly", "Inference Time Compute Implementation in TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)", "Disaggregated Serving in TensorRT LLM", "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server", "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)", "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM", "trtllm-bench", "trtllm-build", "trtllm-eval", "trtllm-serve", "Run benchmarking with <code class=\"docutils literal notranslate\"><span class=\"pre\">trtllm-serve</span></code>", "trtllm-serve", "DeepSeek-R1", "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware", "Deployment Guide for Kimi K2 Thinking on TensorRT LLM - Blackwell", "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Model Recipes", "&lt;no title&gt;", "LLM API Change Guide", "Continuous Integration Overview", "Using Dev Containers", "Introduction to KV Cache Transmission", "Architecture Overview", "Performance Analysis", "TensorRT LLM Benchmarking", "Overview", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "Curl Responses Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Dynamo K8s Example", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "How to Change KV Cache Behavior", "How to Change Block Priorities", "LLM Examples", "Generate text with guided decoding", "Generate text", "Generate text asynchronously", "Generate text in streaming", "Distributed LLM Generation", "KV Cache Connector", "KV Cache Offloading", "Control generated text using logits processor", "Run LLM-API with pytorch backend on Slurm", "Run trtllm-bench with pytorch backend on Slurm", "Run trtllm-serve with pytorch backend on Slurm", "Generate text with multiple LoRA adapters", "Runtime Configuration Examples", "Sampling Techniques Showcase", "Sparse Attention", "Speculative Decoding", "OpenAI Chat Client", "OpenAI Chat Client for Multimodal", "OpenAI Completion Client", "Openai Completion Client For Lora", "OpenAI Completion Client with JSON Schema", "OpenAI Responses Client", "Online Serving Examples", "Additional Outputs", "Multi-Head, Multi-Query, and Group-Query Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy (Prototype)", "Support Matrix", "Checkpoint Loading", "Disaggregated Serving", "Feature Combination Matrix", "Guided Decoding", "Helix Parallelism", "KV Cache Connector", "KV Cache System", "Long Sequences", "LoRA (Low-Rank Adaptation)", "Multimodal Support in TensorRT LLM", "Overlap Scheduler", "Paged Attention, IFB, and Request Scheduling", "Parallelism in TensorRT LLM", "Quantization", "Ray Orchestrator (Prototype)", "Sampling", "Speculative Decoding", "Torch Compile &amp; Piecewise CUDA Graph", "Welcome to TensorRT LLM\u2019s Documentation!", "Building from Source Code on Linux", "Pre-built release container images on NGC", "Installation", "Installing on Linux via <code class=\"docutils literal notranslate\"><span class=\"pre\">pip</span></code>", "Disaggregated-Service (Prototype)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "&lt;no title&gt;", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Build Workflow", "Build the TensorRT LLM Docker Image", "Develop TensorRT LLM on Runpod", "Key Features", "Performance Analysis", "TensorRT-LLM Benchmarking", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Prerequisite Knowledge", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Memory Usage of TensorRT-LLM", "Multimodal Feature Support Matrix (PyTorch Backend)", "Numerical Precision", "Support Matrix", "Troubleshooting", "LLM API with TensorRT Engine", "PyTorch Backend", "LLM API Introduction", "API Reference", "Adding a New Model", "Supported Models", "Overview", "Quick Start Guide", "Release Notes", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Serving with trtllm-serve", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy", "Support Matrix", "Checkpoint Loading", "LoRA (Low-Rank Adaptation)", "Overlap Scheduler", "Quantization", "Sampling", "KV Cache Manager", "Scheduler"], "titleterms": {"": [4, 7, 83, 109, 117], "0": [158, 164], "000": [4, 5], "0528": 2, "0rc0": 9, "1": [2, 8, 9, 16, 18, 19, 20, 38, 45, 91, 96, 103, 110, 127, 129, 151, 158, 164, 176], "10": [4, 164], "100m": 4, "1024": 17, "11": 164, "12": [5, 164], "1200": 17, "120b": [9, 21, 28, 30, 36], "13": 164, "13b": 5, "14": 164, "15": 164, "16": 164, "17": 164, "17b": 33, "18": 164, "180b": 3, "19": 164, "2": [2, 6, 8, 16, 18, 19, 20, 38, 91, 96, 103, 110, 127, 151, 158, 164, 176], "20": 164, "21": 164, "235b": 45, "256": 17, "2b": 122, "3": [2, 12, 16, 17, 18, 20, 32, 38, 45, 91, 103, 107, 127, 129, 136, 151, 154, 176], "30b": 45, "4": [2, 4, 18, 20, 45, 91, 127, 176], "405b": [45, 129], "4096": 17, "4400": 17, "4x": 6, "5": [2, 18], "6": [2, 3, 18], "6x": 4, "7": [18, 164], "70b": [3, 6, 32, 45, 129, 136], "7x": 3, "8": 164, "8192": 17, "8b": 45, "9": 164, "A": [11, 15], "As": 115, "For": [47, 54, 78], "In": [83, 102, 115, 117], "It": [101, 178], "Not": 151, "One": [13, 110], "The": [11, 16, 83, 97, 102, 115, 153], "To": 137, "With": [126, 162], "_prepare_draft_request": 107, "_prepare_draft_token": 107, "_torchllm": 38, "a100": [3, 4], "a22b": 45, "a3b": 45, "about": [24, 26, 27, 125, 138, 162], "absorb": 15, "accept": [13, 14], "access": [32, 33, 132], "account": 133, "accuraci": [7, 14, 29, 30, 32, 33, 95, 123], "achiev": [4, 5, 14], "acknowledg": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20], "across": 97, "activ": [146, 151], "ad": [38, 127, 160, 165], "adapt": [44, 70, 99, 136, 177], "add": [12, 38], "addit": [82, 115], "additional_model_output": 82, "address": 20, "adp": [8, 15], "advanc": [84, 89, 99, 110, 162, 168, 174, 177], "after": 158, "agent": 41, "algorithm": [19, 123], "alibi": [83, 117], "alloc": 97, "allreduc": 123, "alltoal": 12, "also": 172, "altern": 18, "an": [11, 38, 120], "analysi": [8, 10, 43, 135], "announc": 164, "api": [27, 29, 30, 31, 32, 33, 34, 35, 38, 67, 86, 94, 96, 109, 115, 119, 126, 131, 137, 156, 158, 159, 163, 164, 166, 170], "appli": 10, "approach": 20, "arbitrari": 115, "architect": 162, "architectur": [11, 13, 38, 42, 96, 103, 105, 107, 162, 166], "argument": [23, 38, 86, 170], "art": 162, "artifact": 18, "asynchron": 61, "asyncio": 50, "aten": 108, "attempt": 20, "attent": [13, 14, 15, 73, 83, 90, 97, 98, 102, 103, 117, 128, 142, 143, 144, 146, 167, 175], "attention_backend": [26, 29, 32, 33], "attentionbackend": [83, 167], "attentionmetadata": [83, 167], "auto": [19, 108], "auto_deploi": [88, 173], "autodeploi": [89, 174], "autodeploy_config": [84, 168], "autoregress": 13, "auxiliari": 20, "avoid": [20, 39, 137], "awq": [3, 128, 153], "b200": [2, 9, 13, 31], "backend": [13, 17, 21, 29, 30, 67, 68, 69, 83, 90, 92, 108, 152, 154, 157, 161, 165, 167, 175], "background": [8, 10, 11, 13, 14, 19, 99, 100, 108, 177], "balanc": [8, 13, 16, 20, 103], "base": [9, 11, 14, 50], "basecheckpointload": [91, 176], "baseconfigload": [91, 176], "baselin": [8, 139], "baseweightload": [91, 176], "baseweightmapp": [91, 176], "basic": [14, 29, 30, 32, 33, 34, 35, 58, 84, 91, 97, 99, 105, 168, 176, 177], "batch": [83, 102, 115, 117, 142], "beam": [83, 106, 115, 117], "befor": [44, 136, 137], "begin": 137, "behavior": [56, 97, 136], "bench": [22, 43, 68, 84, 99, 100, 107, 135, 137, 168, 177], "benchmark": [2, 7, 26, 29, 30, 31, 32, 33, 34, 35, 44, 45, 84, 136, 137, 168], "best": [2, 7, 38, 39, 103], "beta": 38, "between": 10, "bf16": 153, "bia": 117, "bind": [20, 110, 115, 129], "blackwel": [15, 29, 30, 31, 32, 33, 34, 35, 153], "block": [57, 120], "blockmanag": 120, "blog": 109, "bmm": 170, "bool": 170, "boost": [44, 136], "boundari": [12, 13], "break": 108, "breakdown": 26, "budget": 6, "buffer": [83, 117, 139, 151], "buffermanag": 1, "build": [2, 18, 22, 23, 50, 110, 128, 131, 132, 133, 136, 137, 143], "build_and_run_ad": [86, 170], "built": [86, 91, 111, 170, 176], "c": [16, 110, 115, 118, 151], "cach": [2, 17, 41, 56, 64, 65, 83, 92, 96, 97, 99, 102, 104, 107, 117, 120, 121, 128, 139, 144, 151, 177, 181], "cachecommun": 0, "callback": 10, "can": [121, 158, 162], "cannot": 158, "capabl": 162, "capac": 144, "captur": 10, "case": [19, 21, 96, 142], "cast": 146, "caveat": [44, 136], "challeng": 10, "chang": [38, 56, 57, 126, 142, 164], "chat": [9, 27, 46, 47, 75, 76], "check": 9, "checkpoint": [91, 128, 176], "choos": 7, "chunk": [2, 83, 98, 102, 117, 142, 144], "ci": 39, "class": [38, 115], "classic": 119, "cli": [86, 109, 131, 137, 170], "client": [46, 47, 48, 49, 53, 54, 75, 76, 77, 78, 79, 80, 99, 177], "clock": [2, 44, 136], "clone": 18, "close": [3, 6], "cluster": 92, "cnn_dailymail": 24, "code": [29, 30, 32, 33, 34, 35, 110], "collect": [16, 43, 135], "combin": [2, 10, 93], "come": 7, "command": 45, "commit": 38, "common": [1, 38, 50, 108], "commun": [13, 16, 20, 41, 103, 138, 158], "compat": [10, 31, 100], "compil": [2, 12, 90, 108, 110, 129, 175], "complet": [9, 27, 48, 77, 78, 79], "complex": 19, "compon": [41, 91, 118, 157, 176], "compos": 40, "comprehens": [36, 162], "comput": [10, 11], "concat": 12, "conclus": [8, 139, 142, 143], "config": [23, 91, 95, 128, 176], "configur": [8, 9, 13, 16, 21, 27, 29, 30, 32, 33, 34, 35, 36, 38, 40, 50, 71, 84, 86, 95, 99, 115, 118, 122, 133, 139, 143, 160, 165, 168, 170, 172, 177], "connect": [41, 133], "connector": [64, 96], "consider": 123, "constructor": 38, "consumpt": 126, "contain": [2, 9, 21, 26, 29, 30, 32, 33, 34, 35, 40, 110, 111, 132, 163], "content": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 39, 41, 91, 99, 108, 140, 141, 160, 165, 176, 177], "context": [2, 8, 12, 83, 98, 102, 103, 115, 117, 142, 143, 144], "contigu": [83, 102, 117], "continu": 39, "control": [11, 66, 97, 115], "conv": 146, "convers": [19, 127, 131], "cooper": 10, "coordin": [8, 43, 135], "copi": [12, 20], "core": [11, 16, 84, 91, 160, 165, 168, 176], "correct": 95, "cot": 11, "coverag": 27, "cp": 103, "cpp": 122, "cpu": [10, 20], "creat": [9, 21, 91, 133, 176], "cross": [83, 97, 117], "cuda": [10, 13, 42, 84, 108, 168], "cuda_graph_config": [26, 29, 30, 32, 33, 34, 35], "cudaev": 1, "cudamemcpyasync": 20, "cudastream": 1, "curl": [46, 47, 48, 49], "current": 108, "custom": [41, 50, 58, 91, 103, 108, 130, 176, 181, 182], "cutlass": 13, "cyclic": [83, 117], "data": [10, 15, 103], "databas": 36, "dataset": [2, 8, 16, 17, 19, 22, 44, 45, 136, 137], "datatransceiverst": 0, "datatyp": 97, "deadlock": [10, 20], "debug": [43, 92, 114, 135, 155], "decid": 138, "decod": [9, 10, 14, 19, 23, 59, 74, 94, 97, 107, 115, 125, 151, 166], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 119, "deep": [15, 86, 170], "deepseek": [2, 13, 14, 15, 17, 28, 29, 36, 45, 51], "default": [2, 13, 86, 136, 137, 170], "definit": [39, 129, 160, 165], "dens": [13, 103], "depend": [12, 13], "deploi": [31, 163], "deploy": [29, 30, 31, 32, 33, 34, 35, 36, 89, 109, 174], "deprec": [38, 97], "dequant": 153, "descript": [43, 135], "design": 16, "detail": [38, 96, 122, 153], "dev": 40, "develop": [15, 107, 108, 109, 133, 157], "dgx": 31, "diagram": 13, "dict": 170, "differ": 115, "disabl": [39, 50, 97], "disaggreg": [17, 27, 31, 92, 114, 125], "disaggregated_mpi_work": 27, "disaggserverutil": 0, "distribut": [8, 63], "dive": 15, "do": 162, "docker": [18, 21, 29, 30, 31, 32, 33, 34, 35, 40, 110, 132, 133, 158, 163], "dockerhub": [132, 133], "document": [109, 164], "dora": 122, "dot": [86, 170], "download": [2, 9, 18], "dp": 103, "dq": 153, "draft": [10, 107, 125], "dynamo": [17, 52, 92], "dynasor": 11, "e2": [16, 26, 29, 30, 32, 33, 155], "eagl": [14, 107, 125], "eagle3": [9, 14, 18], "eaglebuff": 1, "eaglemodul": 1, "ebnf": 94, "effect": [16, 19], "embed": [83, 117, 146], "enabl": [2, 19, 43, 97, 103, 116, 121, 132, 135, 139, 143], "encapsul": 41, "end": [12, 20, 26, 29, 30, 32, 33, 95], "endpoint": [27, 29, 30, 31, 32, 33, 34, 35], "engin": [128, 129, 136, 137, 156, 166], "enhanc": 164, "environ": [38, 92, 114], "ep": [15, 16, 31, 103, 170], "eplb": [16, 20, 103], "error": 155, "errorcod": 158, "etp": 13, "eval": 24, "evalu": [14, 16, 29, 30, 32, 33, 128], "event": 120, "everyth": 13, "evolut": 41, "exampl": [11, 24, 43, 52, 55, 58, 71, 81, 84, 85, 96, 100, 115, 122, 128, 129, 130, 135, 136, 158, 168, 169], "except": 151, "exchang": [17, 92], "execut": 155, "executor": [0, 115, 122], "exist": 38, "exp": 12, "expand": 16, "expect": [2, 121], "experi": 8, "experiment": 19, "expert": [12, 13, 15, 16, 20, 29, 86, 103, 116, 170], "explicitdrafttokensbuff": 1, "explor": 2, "express": 94, "extens": 16, "extra": 21, "extra_llm_api_opt": 26, "face": 158, "factor": [83, 117, 128], "factori": 170, "fail": 39, "failur": 108, "falcon": 3, "fals": 170, "faq": [92, 114, 151], "fast": 39, "faster": 3, "featur": [2, 11, 43, 83, 89, 93, 103, 105, 109, 134, 135, 152, 157, 161, 162, 164, 174], "ffn": 103, "field": 38, "file": [27, 86, 110, 170], "find": 39, "first": [4, 19, 26, 29, 30, 32, 33], "fix": 164, "flag": [143, 153], "flayerinfo": 119, "flight": [83, 102, 115, 117], "flow": [44, 136], "fmha": [12, 83, 117], "format": [2, 91, 99, 122, 176, 177], "formatt": 41, "fp16": [2, 153], "fp32": 153, "fp4": [12, 45, 162], "fp8": [2, 4, 12, 45, 83, 104, 117, 128, 139, 153, 162], "fraction": 144, "framework": 11, "free": 144, "free_gpu_memory_fract": 34, "from": [38, 89, 110, 158, 174], "full": [8, 110], "fulli": 130, "function": [119, 130, 145], "further": 20, "fuse": 12, "fuse_a_gemm": 13, "fusion": [12, 13, 108, 129, 139, 143], "futur": [11, 13, 14, 15, 17, 20, 50], "garbag": [43, 135], "gate": 139, "gb200": [9, 16, 31], "gc": [43, 135], "gemm": [12, 13, 139, 143], "genai": [53, 54], "gener": [16, 50, 59, 60, 61, 62, 63, 66, 70, 83, 92, 106, 108, 114, 117, 158], "get": [2, 9, 84, 89, 109, 168, 174], "gil": [10, 43, 135], "got": 158, "gpqa_diamond": 24, "gpqa_extend": 24, "gpqa_main": 24, "gpt": [9, 21, 28, 30, 36, 118, 122], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 153, "gpu": [2, 3, 10, 13, 15, 16, 44, 126, 129, 136, 144, 151, 162], "gqa": 97, "gram": 19, "grammar": [10, 94], "graph": [10, 13, 42, 84, 108, 119, 168], "group": [13, 83, 117], "gsm8k": [16, 24], "guarante": 38, "guid": [10, 29, 30, 31, 32, 33, 34, 35, 36, 38, 59, 94, 103, 107, 108, 109, 115, 140, 157, 160, 163, 165], "h": [0, 1], "h100": [4, 5, 21], "h200": [2, 3, 5, 6, 21], "ha": 4, "hang": 158, "hardwar": [8, 29, 30, 32, 33, 34, 35, 45, 104, 154], "hbm": 5, "head": [12, 83, 117], "header": 110, "health": 9, "helix": 95, "heurist": [19, 170], "hierarch": 38, "hierarchi": 120, "high": [16, 21, 119], "highlight": [19, 20], "hopper": [29, 32, 33, 34, 35, 153], "host": [10, 16, 20, 97, 121, 158], "how": [2, 11, 13, 14, 15, 18, 56, 57, 95, 97, 101, 103, 116, 121, 136, 138, 142, 178], "hub": 158, "hug": 158, "huggingfac": [91, 176], "i": [4, 97, 138, 151], "ibuff": 1, "id": 122, "ifb": 102, "igptdecoderbatch": 1, "imag": [18, 31, 40, 110, 111, 132, 133], "implement": [8, 11, 13, 14, 16, 20, 38, 83, 96, 127, 167], "import": 117, "improv": 125, "incorpor": [88, 173], "increas": 6, "indic": 109, "infer": [7, 11, 14, 16, 17, 21, 27, 115, 151, 163], "inform": [43, 119, 135], "infrastructur": 164, "initi": 20, "inplac": 108, "input": [83, 117, 158], "instal": [2, 112, 113, 155], "int4": [3, 153], "int8": [83, 117, 153], "integr": [10, 39, 90, 175], "inter": [20, 26, 29, 30, 32, 33], "interfac": [16, 96, 181], "intern": 118, "interv": 20, "introduct": [11, 15, 16, 26, 29, 30, 31, 32, 33, 34, 35, 41, 55, 158, 160, 165, 181, 182], "invok": 158, "ipcnvlsmemori": 1, "ipcutil": 1, "ir": 108, "isl": [2, 17], "issu": [2, 20, 108, 151, 157, 158, 164], "itensor": 1, "iter": [43, 135], "itl": [26, 29, 30, 32, 33], "jenkin": 39, "json": [79, 94, 95], "json_mode_ev": 24, "k": [12, 102], "k2": 31, "k8": 52, "kei": [13, 26, 29, 30, 32, 33, 41, 89, 103, 130, 133, 134, 138, 157, 161, 162, 164, 174], "kernel": [6, 10, 12, 13, 16, 20, 21, 103], "kimi": 31, "knob": 38, "knowledg": [108, 140, 141], "known": [108, 110, 151, 157, 164], "kv": [2, 17, 41, 56, 64, 65, 83, 92, 96, 97, 102, 104, 107, 117, 120, 121, 128, 139, 144, 151, 181], "kv_cache_config": [26, 29, 32, 33, 34], "kv_cache_free_gpu_memory_fract": [29, 30, 32, 33, 35], "kvcacheconnectorschedul": 96, "kvcacheconnectorwork": 96, "kvcacheeventmanag": 120, "kvcachemanag": 166, "larg": 16, "latenc": [2, 6, 8, 13, 21, 22, 26, 29, 30, 32, 33, 136, 137, 139], "latest": [5, 162], "launch": [9, 12, 13, 18, 21, 26, 29, 30, 31, 32, 33, 34, 35, 43, 92, 135, 163], "layer": [13, 15, 146], "layernorm": 128, "layout": [17, 92, 130], "leader": 96, "level": [13, 16, 41, 87, 103, 119, 166, 171], "librari": 41, "licens": [32, 33], "light": 8, "limit": [44, 96, 97, 110, 125, 136, 164, 172], "linear": 146, "link": [104, 109, 110], "linux": [110, 113], "list": [11, 170], "llama": [3, 6, 45, 129, 136, 139, 143], "llama2": 5, "llama3": 32, "llama4": [18, 33], "llm": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 29, 30, 31, 32, 33, 34, 35, 38, 44, 50, 55, 58, 63, 67, 86, 89, 94, 95, 100, 103, 104, 108, 109, 110, 116, 125, 128, 130, 131, 132, 133, 136, 137, 142, 151, 154, 156, 158, 162, 163, 164, 170, 174], "llmarg": 38, "lm": 12, "load": [8, 16, 20, 91, 103, 130, 160, 165, 176], "loader": [91, 130, 176], "local": [12, 158], "log": [18, 87, 171], "logic": [16, 41], "logit": [23, 66, 106, 115], "logprob": 82, "long": 98, "longbench_v2": 24, "lookahead": 125, "lookaheadbuff": 1, "lookaheadmodul": 1, "lora": [23, 44, 70, 78, 99, 122, 136, 177], "loracach": [1, 122], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [12, 21, 41, 99, 123, 136, 139, 177], "lower": 12, "machin": [16, 17], "major": 11, "make": [10, 128], "manag": [20, 38, 44, 99, 119, 120, 136, 177, 181], "manual": 170, "map": [122, 136], "mapper": [91, 176], "mark": 115, "marker": [43, 135], "mask": 10, "match": 129, "mathemat": 8, "matrix": [29, 30, 89, 90, 93, 100, 104, 152, 153, 154, 161, 174, 175], "maverick": [18, 45], "max": [2, 21, 136, 142, 144], "max_batch_s": [29, 30, 32, 33, 34, 35, 102], "max_num_token": [29, 30, 32, 33, 34, 35, 102], "max_seq_len": [29, 30, 32, 33, 34, 35, 102], "maximum": 144, "measur": [17, 45], "mechan": 8, "medusa": [125, 136], "medusamodul": 1, "memori": [2, 5, 20, 97, 121, 126, 144, 151], "memorycount": 1, "merg": [39, 86, 170], "method": [7, 38, 119], "methodologi": [17, 26], "metric": [26, 27, 29, 30, 32, 33], "migrat": 20, "min": 2, "miscellan": 16, "mix": 13, "mixtur": [103, 116], "mla": [2, 15, 95], "mlp": [128, 139, 146], "mlperf": 4, "mm_embedding_serv": 27, "mmlu": 24, "mmmu": 24, "modal": [27, 44, 136, 154], "mode": [44, 136], "model": [2, 8, 9, 10, 13, 14, 29, 30, 31, 32, 33, 34, 35, 36, 44, 45, 89, 90, 95, 100, 103, 104, 107, 109, 118, 125, 127, 129, 130, 136, 138, 139, 143, 147, 154, 155, 158, 160, 161, 162, 164, 165, 166, 174, 175], "modelconfig": 1, "modelopt": 104, "modif": 41, "modifi": 38, "modul": [14, 15, 95, 103, 119, 122], "moe": [15, 20, 21, 29, 30, 103, 116], "moe_backend": 13, "moe_config": [26, 29, 30, 32, 33, 34, 35], "moe_expert_parallel_s": [29, 30, 32, 33, 34, 35], "monitor": 18, "more": [2, 6, 12, 43, 135], "motiv": [8, 10, 11, 16, 17, 19, 92, 103, 105], "mount": 40, "mpi_abort": 158, "mpi_comm_world": 158, "mqa": [15, 97], "mtp": [12, 13, 14, 20, 107], "much": 97, "multi": [13, 17, 19, 20, 27, 44, 83, 92, 99, 108, 117, 129, 136, 154, 158, 177], "multimod": [26, 27, 47, 54, 76, 100, 152, 161], "multipl": [70, 143], "multithread": 20, "mutex": 10, "n": 19, "name": [23, 38, 39, 130], "nativ": [12, 130], "nearli": 5, "nemo": [99, 177], "net": 158, "network": [12, 44, 136], "new": [6, 38, 83, 127, 160, 165, 167], "next": [7, 34, 163], "ngc": [18, 26, 111], "ngram": [107, 125], "node": [27, 129, 158], "non": [38, 136], "none": 170, "norm": [139, 143], "normal": 146, "notat": [86, 170], "note": [115, 117, 164], "nsight": [43, 135], "num": 142, "numa": 20, "numer": 153, "nvfp4": [28, 29, 36, 104, 153], "nvidia": [13, 15, 43, 135], "nvl72": 31, "nvtx": [43, 135], "o": 151, "observ": 16, "obtain": 115, "off": 8, "offlin": [16, 94, 104, 163], "offload": [65, 97, 121], "one": 16, "onli": [13, 21, 43, 108, 110, 135, 153, 158], "onlin": [16, 20, 81, 94, 163], "op": 108, "openai": [21, 31, 75, 76, 77, 78, 79, 80, 100], "oper": 108, "optim": [8, 12, 13, 14, 15, 17, 20, 42, 83, 84, 92, 100, 108, 117, 143, 162, 168], "option": [2, 18, 21, 29, 30, 32, 33, 34, 35, 82, 84, 110, 139, 143, 144, 168], "orchestr": 105, "osl": [2, 17], "oss": [9, 21, 28, 30, 36], "other": 136, "out": [2, 160, 165], "outlook": 41, "output": [26, 29, 30, 32, 33, 82, 115, 136], "over": [3, 16, 38], "overhead": 20, "overlap": [12, 17, 42, 92, 101, 178], "overload": 41, "overrid": 40, "overview": [12, 38, 39, 42, 45, 91, 96, 103, 118, 128, 130, 131, 162, 176], "ovewiew": 166, "own": [88, 173, 182], "p": 121, "pack": [83, 117], "pad": [108, 117], "page": [20, 83, 102, 117, 120, 142, 143, 144], "parallel": [12, 13, 15, 16, 20, 29, 95, 103, 116, 122, 136, 138, 143], "paramet": [95, 118, 170], "pareto": 8, "parser": 51, "part": [12, 16, 20, 127], "partial": 97, "pattern": [96, 119, 129], "pdl": 12, "per": [26, 29, 30, 32, 33], "perf": [53, 54], "perform": [2, 4, 7, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 29, 30, 32, 33, 34, 35, 43, 84, 108, 121, 123, 125, 135, 137, 139, 140, 143, 162, 168], "persist": [44, 136], "phase": [12, 83, 117], "piecewis": 108, "pip": 113, "pipelin": [39, 103, 138, 143], "pitfal": 137, "plugin": [23, 129, 139, 143, 148], "pod": 133, "polici": [97, 144], "pool": [120, 146, 151], "popular": 36, "posit": [83, 117], "post": [39, 115], "postprocess": 130, "power": [44, 136], "pp": 103, "practic": [7, 38, 39, 103], "pre": [104, 111], "preced": [86, 170], "precis": [12, 13, 15, 90, 123, 153, 175], "preconfigur": [29, 30], "predict": 20, "prefer": 38, "prefil": 102, "prepar": [2, 18, 31, 44, 45, 128, 133, 136, 137], "prepare_dataset": 22, "prerequisit": [2, 9, 18, 21, 29, 30, 31, 32, 33, 34, 35, 110, 140, 141, 160, 165], "prevent": [20, 121], "principl": 38, "prioriti": 57, "probe": 11, "process": 20, "process_grid": 170, "processor": [66, 106, 115], "product": 162, "profil": [13, 43, 135, 143], "programmat": [12, 13], "promot": 38, "prompt_logprob": 82, "prompttuningparam": 1, "properti": 97, "propos": 10, "prototyp": [89, 105, 114, 157], "provid": [6, 107], "push": [12, 13, 18], "py": [22, 39, 86, 170], "pyexecutor": 166, "python": [10, 16, 110, 115, 151], "pytorch": [12, 43, 44, 67, 68, 69, 89, 135, 136, 152, 154, 157, 161, 162, 165, 174], "q": [12, 153], "qkv": [83, 117], "quantiz": [7, 12, 44, 50, 99, 104, 128, 131, 136, 139, 149, 153, 177, 179], "quantmod": 153, "queri": [31, 83, 117], "quick": [9, 36, 100, 104, 107, 109, 157, 158, 163, 172], "quickstart": 136, "quit": 158, "qwen": 17, "qwen3": [34, 35, 45], "r1": [2, 13, 14, 15, 17, 28, 29, 36, 45, 51], "rab": 117, "race": 10, "rai": 105, "rank": [99, 128, 158, 177], "ratio": 8, "rawengin": 1, "re": [13, 108], "reason": 51, "receiv": 41, "recip": [29, 30, 36], "recommend": [29, 30, 32, 33, 34, 35, 139, 143, 151], "recompil": 108, "record_signatur": 119, "redraft": 125, "reduc": [20, 126, 139, 143], "reduct": 12, "refer": [11, 84, 96, 101, 103, 109, 127, 159, 168, 178], "regist": 127, "registr": [160, 165], "registri": 18, "regular": 94, "reject": 10, "rel": 117, "relat": 119, "relax": [13, 14], "releas": [18, 111, 164], "replic": 103, "repositori": 18, "reproduc": [2, 13, 15, 16, 17, 45], "request": [9, 18, 21, 26, 97, 102, 115], "requir": [119, 123], "requires_shape_prop": 170, "resourcemanag": 166, "respons": [27, 49, 80, 115], "result": [2, 8, 43, 45, 115, 135, 137], "retent": 97, "rethink": 12, "retriev": 119, "return_context_logit": 82, "return_generation_logit": 82, "reus": [97, 121], "revisit": [102, 142], "rewind": 107, "rewrit": 119, "right": 7, "roadmap": [89, 105, 174], "robin": 8, "roll": [83, 117], "rope": [83, 117], "rotari": [83, 117], "round": 8, "router": 13, "routergemm": 13, "run": [2, 9, 14, 16, 21, 26, 29, 30, 32, 33, 34, 35, 38, 43, 44, 45, 67, 68, 69, 85, 99, 100, 104, 122, 126, 135, 136, 137, 158, 163, 169, 177], "runpod": 133, "runtim": [1, 10, 15, 41, 42, 50, 71, 90, 110, 118, 122, 129, 144, 150, 151, 175], "runtimedefault": 1, "runtimeerror": 158, "salt": 97, "same": 6, "sampl": [9, 18, 21, 50, 72, 106, 107, 118, 125, 180], "samplingconfig": 1, "save": 137, "scaffold": 11, "scaffoldingllm": 11, "scale": [12, 16, 20, 83, 117, 128], "scatter": 143, "schedul": [8, 42, 96, 101, 102, 142, 144, 166, 178, 182], "schema": [38, 79, 94], "scout": [33, 45], "script": [85, 169], "seamless": [89, 174], "seamlessli": 10, "search": [83, 106, 117], "sec": 5, "second": [26, 29, 30, 32, 33], "secur": 97, "see": 172, "select": [21, 40], "send": 115, "sender": 41, "sequenc": 98, "serial": 0, "serv": [17, 21, 25, 26, 27, 31, 43, 69, 81, 92, 94, 99, 100, 107, 125, 135, 163, 172, 177], "server": [9, 17, 18, 21, 27, 29, 30, 31, 32, 33, 34, 35, 92, 99, 100, 115, 177], "servic": [26, 114], "session": 158, "set": [8, 26, 29, 30, 32, 33, 34, 35, 44, 84, 136, 138, 168], "setup": 19, "sever": 12, "shard": [138, 170], "sharding_dim": 170, "sharding_sourc": 170, "share": 12, "shoot": 130, "show": [29, 30, 32, 33, 34, 35], "showcas": 72, "simple_shard_onli": 170, "singl": [3, 99, 158, 177], "situat": 121, "size": [97, 142, 144, 151], "slide": [83, 98, 117], "slurm": [27, 31, 58, 67, 68, 69, 92, 158], "smart": 13, "smoothquant": 153, "softwar": 154, "sol": 8, "sota": 7, "sourc": 110, "spars": [12, 13, 73], "specif": [36, 43, 135], "specul": [9, 10, 14, 19, 23, 74, 97, 107, 125], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": [7, 8, 11, 19], "speedup": 14, "ssh": [132, 133], "stabil": 38, "stage": 39, "start": [9, 18, 26, 27, 36, 84, 89, 99, 100, 107, 109, 157, 158, 163, 168, 172, 174, 177], "state": [10, 162], "statist": 16, "statu": [20, 108], "step": [2, 16, 17, 18, 29, 30, 32, 33, 34, 35, 91, 110, 127, 160, 163, 165, 176], "stop": 18, "strategi": [8, 13, 15, 103, 138], "stream": [13, 20, 62, 108, 126], "streamingllm": [83, 117], "structur": [12, 94, 115], "studi": [14, 16, 17, 19, 142], "style": 50, "subcommand": 136, "summari": [8, 136, 139, 143], "support": [12, 14, 17, 20, 29, 30, 44, 89, 90, 92, 95, 99, 100, 104, 110, 129, 130, 136, 152, 153, 154, 161, 162, 174, 175, 177], "support_partial_config": 170, "swiglu": 139, "syntax": [24, 27], "synthet": 17, "system": [13, 43, 97, 135], "tabl": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 39, 41, 91, 99, 108, 109, 140, 141, 160, 165, 176, 177], "tag": [18, 94, 111], "target": [10, 107, 125], "technic": 153, "techniqu": [7, 11, 72], "templat": 133, "tensor": [0, 12, 83, 103, 115, 116, 117, 119, 122, 138, 151], "tensor_parallel_s": [29, 30, 32, 33, 34, 35], "tensorrt": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 29, 30, 31, 32, 33, 34, 35, 44, 89, 95, 100, 103, 104, 108, 109, 110, 116, 125, 128, 129, 130, 131, 132, 133, 136, 137, 142, 151, 154, 156, 162, 164, 174], "test": [18, 21, 29, 30, 32, 33, 34, 35, 38, 39, 95, 155], "test_to_stage_map": 39, "text": [59, 60, 61, 62, 66, 70], "theoret": 8, "think": [31, 138], "thought": 16, "thrash": 20, "through": 31, "throughput": [2, 6, 8, 15, 21, 22, 26, 29, 30, 32, 33, 44, 45, 136, 137], "time": [11, 26, 29, 30, 32, 33, 143, 151], "tip": [18, 21, 29, 30, 32, 33, 34, 35, 84, 108, 137, 155, 158, 168, 172], "tlb": 20, "tllmlogger": 1, "tok": 4, "token": [4, 5, 8, 10, 20, 26, 29, 30, 32, 33, 50, 142, 144], "token_norm_dist": 22, "token_unif_dist": 22, "tool": 131, "top": 166, "topic": 110, "topologi": 123, "torch": [12, 108], "torchllmarg": 38, "total": [26, 29, 30, 32, 33], "tp": [8, 26, 29, 30, 32, 33, 103, 170], "tpot": [26, 29, 30, 32, 33], "trace": [10, 108], "trade": 8, "tradeoff": [101, 178], "transceiv": 41, "transfer": 41, "transferag": 0, "transform": [17, 92], "translat": [16, 17, 19, 130], "transmiss": 41, "tree": [14, 125, 160, 165], "trigger": [39, 120], "triton": [17, 21, 115], "troubl": 130, "troubleshoot": [10, 18, 21, 29, 30, 31, 32, 33, 34, 35, 92, 114, 137, 155, 158], "trt": 7, "trtllm": [13, 17, 22, 23, 24, 25, 26, 27, 43, 68, 69, 84, 92, 94, 99, 100, 107, 135, 137, 163, 168, 172, 177], "trtllmattent": 83, "true": 170, "trust_remote_cod": [29, 30, 32, 33, 34, 35], "ttft": [26, 29, 30, 32, 33], "tune": [2, 18, 108, 121, 140, 142], "turn": 19, "two": [10, 107], "type": [0, 38, 120], "understand": [142, 151], "unit": [39, 95, 155], "unnecessari": 39, "up": [3, 6, 7, 11, 19, 26], "updat": [20, 38, 164], "upload": [132, 133], "upper": 41, "us": [12, 18, 21, 39, 40, 66, 91, 95, 96, 119, 122, 125, 143, 144, 151, 158, 176], "usag": [24, 39, 84, 89, 91, 92, 96, 99, 101, 104, 105, 106, 107, 108, 123, 151, 168, 174, 176, 177, 178], "user": [107, 139], "v": [5, 8, 12, 116], "valid": [38, 44, 136], "vanilla": 14, "variabl": [38, 45, 92, 114], "verif": [13, 107], "verifi": [29, 30, 32, 33, 127], "via": [100, 113, 137, 172], "virtualmemori": 1, "visual": [43, 135], "volum": 40, "vote": 11, "w4a16": 153, "w8a16": 153, "w8a8": 153, "wa": 158, "wai": 18, "wait": 8, "waiv": 39, "weekli": 18, "weight": [15, 20, 91, 126, 127, 128, 129, 130, 151, 153, 160, 165, 176], "welcom": 109, "what": [4, 7, 120, 162], "when": [13, 91, 95, 119, 158, 176], "wide": [29, 31, 103], "width": 115, "window": [83, 97, 98, 117, 144], "windowblockmanag": 120, "wip": 2, "within": 6, "without": 110, "wo": 12, "work": [11, 13, 14, 15, 17, 20, 95, 101, 136, 178], "worker": [11, 96], "workflow": [38, 41, 43, 44, 88, 119, 130, 131, 135, 136, 173], "workload": 13, "world": 118, "worldconfig": 1, "write": 127, "xqa": [6, 83, 117], "yaml": [27, 29, 30, 32, 33, 34, 35, 84, 86, 95, 99, 168, 170, 172, 177], "you": [137, 162], "your": [18, 88, 173, 182]}})
\ No newline at end of file
diff --git a/latest/torch/adding_new_model.html b/latest/torch/adding_new_model.html
index af0d65bd51..31b0ec58a9 100644
--- a/latest/torch/adding_new_model.html
+++ b/latest/torch/adding_new_model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -854,9 +856,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/arch_overview.html b/latest/torch/arch_overview.html
index 244ea95d7b..9d0cb40659 100644
--- a/latest/torch/arch_overview.html
+++ b/latest/torch/arch_overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -720,9 +722,9 @@ The document <a class="reference internal" href="kv_cache_manager.html"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/attention.html b/latest/torch/attention.html
index 829719cd82..7662ce13f4 100644
--- a/latest/torch/attention.html
+++ b/latest/torch/attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -844,9 +846,9 @@ For example, the Flashinfer metadata fills <code class="docutils literal notrans
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html b/latest/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
index c1f38a673c..4d009b9c61 100644
--- a/latest/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
+++ b/latest/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -785,9 +787,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/example_run.html b/latest/torch/auto_deploy/advanced/example_run.html
index 9ceaa70570..71fc4ad88b 100644
--- a/latest/torch/auto_deploy/advanced/example_run.html
+++ b/latest/torch/auto_deploy/advanced/example_run.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -701,9 +703,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/expert_configurations.html b/latest/torch/auto_deploy/advanced/expert_configurations.html
index e4304417e7..d3d486530b 100644
--- a/latest/torch/auto_deploy/advanced/expert_configurations.html
+++ b/latest/torch/auto_deploy/advanced/expert_configurations.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -696,6 +698,23 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
 </ul>
 <p>You can enable multiple dimensions simultaneously. For example, <code class="docutils literal notranslate"><span class="pre">['tp',</span> <span class="pre">'ep']</span></code> will apply both tensor parallelism and expert parallelism.</p>
 </section>
+<section id="process-grid-dict-default-none">
+<h4><code class="docutils literal notranslate"><span class="pre">process_grid</span></code> (dict, default: <code class="docutils literal notranslate"><span class="pre">None</span></code>)<a class="headerlink" href="#process-grid-dict-default-none" title="Link to this heading">#</a></h4>
+<p>Specifies a 2D device mesh for hybrid EP+TP parallelism.</p>
+<ul class="simple">
+<li><p>NOTE 1: This grid applies only to the MoE layers. Attention, Mamba, and MLP layers are unaffected.</p></li>
+<li><p>NOTE 2: The order of the keys matters. Process grid’s layout is in the generalized column-major order,
+that is, the last dimension is stride-one.</p></li>
+<li><p>NOTE 3: <code class="docutils literal notranslate"><span class="pre">ep</span> <span class="pre">*</span> <span class="pre">tp</span></code> must be equal to the provided world size. Otherwise, the mesh will be considered invalid,
+and 1D ep-only parallelism will be applied.</p></li>
+</ul>
+<p>Example:</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>    <span class="n">process_grid</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;ep&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;tp&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
+</pre></div>
+</div>
+<p>If <code class="docutils literal notranslate"><span class="pre">world_size</span> <span class="pre">==</span> <span class="pre">4</span></code>, ranks [0,1] and [2,3] will create two EP groups. Experts will be distributed across these two
+groups, and internally, TP=2 column-row sharding will be applied.</p>
+</section>
 <section id="requires-shape-prop-bool-default-true">
 <h4><code class="docutils literal notranslate"><span class="pre">requires_shape_prop</span></code> (bool, default: <code class="docutils literal notranslate"><span class="pre">true</span></code>)<a class="headerlink" href="#requires-shape-prop-bool-default-true" title="Link to this heading">#</a></h4>
 <p>Whether shape propagation is required before applying this transform. Shape propagation enables the transform to make informed decisions about sharding strategies based on tensor dimensions.</p>
@@ -797,6 +816,7 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#sharding-source-list-default-manual-factory-heuristic"><code class="docutils literal notranslate"><span class="pre">sharding_source</span></code> (list, default: <code class="docutils literal notranslate"><span class="pre">['manual',</span> <span class="pre">'factory',</span> <span class="pre">'heuristic']</span></code>)</a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#support-partial-config-bool-default-true"><code class="docutils literal notranslate"><span class="pre">support_partial_config</span></code> (bool, default: <code class="docutils literal notranslate"><span class="pre">true</span></code>)</a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#sharding-dims-list-default-tp-ep-bmm"><code class="docutils literal notranslate"><span class="pre">sharding_dims</span></code> (list, default: <code class="docutils literal notranslate"><span class="pre">['tp',</span> <span class="pre">'ep',</span> <span class="pre">'bmm']</span></code>)</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#process-grid-dict-default-none"><code class="docutils literal notranslate"><span class="pre">process_grid</span></code> (dict, default: <code class="docutils literal notranslate"><span class="pre">None</span></code>)</a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#requires-shape-prop-bool-default-true"><code class="docutils literal notranslate"><span class="pre">requires_shape_prop</span></code> (bool, default: <code class="docutils literal notranslate"><span class="pre">true</span></code>)</a></li>
 </ul>
 </li>
@@ -898,9 +918,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/logging.html b/latest/torch/auto_deploy/advanced/logging.html
index c3c3b2f025..c740815287 100644
--- a/latest/torch/auto_deploy/advanced/logging.html
+++ b/latest/torch/auto_deploy/advanced/logging.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -634,9 +636,9 @@ decreasing verbosity;</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/serving_with_trtllm_serve.html b/latest/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
index f0d9e370ca..80564c169e 100644
--- a/latest/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
+++ b/latest/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -732,9 +734,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/advanced/workflow.html b/latest/torch/auto_deploy/advanced/workflow.html
index eb39b693c9..0a415bba90 100644
--- a/latest/torch/auto_deploy/advanced/workflow.html
+++ b/latest/torch/auto_deploy/advanced/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -652,9 +654,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/auto-deploy.html b/latest/torch/auto_deploy/auto-deploy.html
index e54a973284..ca76850b25 100644
--- a/latest/torch/auto_deploy/auto-deploy.html
+++ b/latest/torch/auto_deploy/auto-deploy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -725,9 +727,9 @@ We welcome community contributions, see <code class="docutils literal notranslat
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/auto_deploy/support_matrix.html b/latest/torch/auto_deploy/support_matrix.html
index 526d70e9c7..e8c2c062d7 100644
--- a/latest/torch/auto_deploy/support_matrix.html
+++ b/latest/torch/auto_deploy/support_matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -670,7 +672,7 @@ In addition, the following models have been officially validated using the defau
 </section>
 <section id="precision-support">
 <h2>Precision Support<a class="headerlink" href="#precision-support" title="Link to this heading">#</a></h2>
-<p>AutoDeploy supports models with various precision formats, including quantized checkpoints generated by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer"><code class="docutils literal notranslate"><span class="pre">TensorRT-Model-Optimizer</span></code></a>.</p>
+<p>AutoDeploy supports models with various precision formats, including quantized checkpoints generated by <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer"><code class="docutils literal notranslate"><span class="pre">Model-Optimizer</span></code></a>.</p>
 <p><strong>Supported precision types include:</strong></p>
 <ul class="simple">
 <li><p>BF16 / FP16 / FP32</p></li>
@@ -807,9 +809,9 @@ In addition, the following models have been officially validated using the defau
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/features/checkpoint_loading.html b/latest/torch/features/checkpoint_loading.html
index b7491f8c66..9e9a7613fa 100644
--- a/latest/torch/features/checkpoint_loading.html
+++ b/latest/torch/features/checkpoint_loading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -992,9 +994,9 @@ If the format shares components with an existing framework (such as HF), you onl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/features/lora.html b/latest/torch/features/lora.html
index 86671f9495..b6a8dab656 100644
--- a/latest/torch/features/lora.html
+++ b/latest/torch/features/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -908,9 +910,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/features/overlap_scheduler.html b/latest/torch/features/overlap_scheduler.html
index 98aa5b3042..877bf6f2e9 100644
--- a/latest/torch/features/overlap_scheduler.html
+++ b/latest/torch/features/overlap_scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -671,9 +673,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/features/quantization.html b/latest/torch/features/quantization.html
index a97c6c20dd..ed02fe1afa 100644
--- a/latest/torch/features/quantization.html
+++ b/latest/torch/features/quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -516,15 +518,15 @@
   <section class="tex2jax_ignore mathjax_ignore" id="quantization">
 <h1>Quantization<a class="headerlink" href="#quantization" title="Link to this heading">#</a></h1>
 <p>The PyTorch backend supports FP8 and NVFP4 quantization. You can pass quantized models in HF model hub,
-which are generated by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer">TensorRT Model Optimizer</a>.</p>
+which are generated by <a class="reference external" href="https://github.com/NVIDIA/Model-Optimizer">Model Optimizer</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._torch</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span>
 <span class="n">llm</span> <span class="o">=</span> <span class="n">LLM</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s1">&#39;nvidia/Llama-3.1-8B-Instruct-FP8&#39;</span><span class="p">)</span>
 <span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="s2">&quot;Hello, my name is&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>Or you can try the following commands to get a quantized model by yourself:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
-<span class="nb">cd</span><span class="w"> </span>TensorRT-Model-Optimizer/examples/llm_ptq
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/NVIDIA/Model-Optimizer.git
+<span class="nb">cd</span><span class="w"> </span>Model-Optimizer/examples/llm_ptq
 scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </span>&lt;huggingface_model_card&gt;<span class="w"> </span>--quant<span class="w"> </span>fp8<span class="w"> </span>--export_fmt<span class="w"> </span>hf
 </pre></div>
 </div>
@@ -637,9 +639,9 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/features/sampling.html b/latest/torch/features/sampling.html
index 0b6cae17c2..6e5aa24191 100644
--- a/latest/torch/features/sampling.html
+++ b/latest/torch/features/sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -354,6 +354,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -362,6 +363,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -638,9 +640,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/kv_cache_manager.html b/latest/torch/kv_cache_manager.html
index d20e498ca6..77dda8b845 100644
--- a/latest/torch/kv_cache_manager.html
+++ b/latest/torch/kv_cache_manager.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -713,9 +715,9 @@ Then, test it to ensure the <code class="docutils literal notranslate"><span cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       
diff --git a/latest/torch/scheduler.html b/latest/torch/scheduler.html
index a2287f0567..f5af969f99 100644
--- a/latest/torch/scheduler.html
+++ b/latest/torch/scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc5';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc6';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc5" />
+  <meta name="docsearch:version" content="1.2.0rc6" />
 
 
   </head>
@@ -358,6 +358,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/curl_responses_client.html">Curl Responses Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
@@ -366,6 +367,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../examples/openai_responses_client.html">OpenAI Responses Client</a></li>
 </ul>
 </details></li>
 <li class="toctree-l1"><a class="reference internal" href="../examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
@@ -738,9 +740,9 @@ In the <code class="docutils literal notranslate"><span class="pre">create_pytor
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on December 07, 2025.</p>
+  <p>Last updated on December 15, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e4c7078">e4c7078</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/9ba1426">9ba1426</a>.</p>
   
 </div></div>
       

GPU	Performance Profile	ISL / OSL	Concurrency	Config	Command
8xB200_NVL	Min Latency	1024 / 1024	4	1k1k_tp8_conc4.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml`
8xB200_NVL	Low Latency	1024 / 1024	8	1k1k_tp8_conc8.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml`
8xB200_NVL	Balanced	1024 / 1024	16	1k1k_tp8_conc16.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml`
8xB200_NVL	High Throughput	1024 / 1024	32	1k1k_tp8_conc32.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml`
8xB200_NVL	Max Throughput	1024 / 1024	64	1k1k_tp8_conc64.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml`
8xB200_NVL	Min Latency	8192 / 1024	4	8k1k_tp8_conc4.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml`
8xB200_NVL	Low Latency	8192 / 1024	8	8k1k_tp8_conc8.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml`
8xB200_NVL	Balanced	8192 / 1024	16	8k1k_tp8_conc16.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml`
8xB200_NVL	High Throughput	8192 / 1024	32	8k1k_tp8_conc32.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml`
8xB200_NVL	Max Throughput	8192 / 1024	64	8k1k_tp8_conc64.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml`
8xH200_SXM	Min Latency	1024 / 1024	4	1k1k_tp8_conc4.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml`
8xH200_SXM	Low Latency	1024 / 1024	8	1k1k_tp8_conc8.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml`
8xH200_SXM	Balanced	1024 / 1024	16	1k1k_tp8_conc16.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml`
8xH200_SXM	High Throughput	1024 / 1024	32	1k1k_tp8_conc32.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml`
8xH200_SXM	Max Throughput	1024 / 1024	64	1k1k_tp8_conc64.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml`
8xH200_SXM	Min Latency	8192 / 1024	4	8k1k_tp8_conc4.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml`
8xH200_SXM	Low Latency	8192 / 1024	8	8k1k_tp8_conc8.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml`
8xH200_SXM	Balanced	8192 / 1024	16	8k1k_tp8_conc16.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml`
8xH200_SXM	High Throughput	8192 / 1024	32	8k1k_tp8_conc32.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml`
8xH200_SXM	Max Throughput	8192 / 1024	64	8k1k_tp8_conc64.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml`
GPU	Performance Profile	ISL / OSL	Concurrency	Config	Command
4xB200_NVL	Min Latency	1024 / 1024	4	1k1k_tp4_conc4.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml`
8xB200_NVL	Low Latency	1024 / 1024	4	1k1k_tp8_conc4.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml`
4xB200_NVL	Low Latency	1024 / 1024	8	1k1k_tp4_conc8.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml`
8xB200_NVL	Low Latency	1024 / 1024	8	1k1k_tp8_conc8.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml`
4xB200_NVL	Low Latency	1024 / 1024	16	1k1k_tp4_conc16.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml`
8xB200_NVL	Low Latency	1024 / 1024	16	1k1k_tp8_conc16.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml`
4xB200_NVL	Low Latency	1024 / 1024	32	1k1k_tp4_conc32.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml`
8xB200_NVL	High Throughput	1024 / 1024	32	1k1k_tp8_conc32.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml`
4xB200_NVL	High Throughput	1024 / 1024	64	1k1k_tp4_conc64.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml`
8xB200_NVL	High Throughput	1024 / 1024	64	1k1k_tp8_conc64.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml`
4xB200_NVL	High Throughput	1024 / 1024	128	1k1k_tp4_conc128.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml`
8xB200_NVL	High Throughput	1024 / 1024	128	1k1k_tp8_conc128.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml`
4xB200_NVL	High Throughput	1024 / 1024	256	1k1k_tp4_conc256.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml`
8xB200_NVL	Max Throughput	1024 / 1024	256	1k1k_tp8_conc256.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml`
4xB200_NVL	Min Latency	8192 / 1024	4	8k1k_tp4_conc4.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml`
8xB200_NVL	Low Latency	8192 / 1024	4	8k1k_tp8_conc4.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml`
4xB200_NVL	Low Latency	8192 / 1024	8	8k1k_tp4_conc8.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml`
8xB200_NVL	Low Latency	8192 / 1024	8	8k1k_tp8_conc8.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml`
4xB200_NVL	Low Latency	8192 / 1024	16	8k1k_tp4_conc16.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml`
8xB200_NVL	Low Latency	8192 / 1024	16	8k1k_tp8_conc16.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml`
4xB200_NVL	Low Latency	8192 / 1024	32	8k1k_tp4_conc32.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml`
8xB200_NVL	High Throughput	8192 / 1024	32	8k1k_tp8_conc32.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml`
4xB200_NVL	High Throughput	8192 / 1024	64	8k1k_tp4_conc64.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml`
8xB200_NVL	High Throughput	8192 / 1024	64	8k1k_tp8_conc64.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml`
4xB200_NVL	High Throughput	8192 / 1024	128	8k1k_tp4_conc128.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml`
8xB200_NVL	High Throughput	8192 / 1024	128	8k1k_tp8_conc128.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml`
4xB200_NVL	High Throughput	8192 / 1024	256	8k1k_tp4_conc256.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml`
8xB200_NVL	Max Throughput	8192 / 1024	256	8k1k_tp8_conc256.yaml	`trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml`
DeepSeek-R1	H100, H200	Max Throughput	deepseek-r1-throughput.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml`	deepseek-r1-throughput.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml`
DeepSeek-R1	B200, GB200	Max Throughput	deepseek-r1-deepgemm.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-deepgemm.yaml`	deepseek-r1-deepgemm.yaml	`trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-deepgemm.yaml`
DeepSeek-R1 (NVFP4)	B200, GB200	Max Throughput	deepseek-r1-throughput.yaml	`trtllm-serve nvidia/DeepSeek-R1-FP4 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-throughput.yaml`	deepseek-r1-throughput.yaml	`trtllm-serve nvidia/DeepSeek-R1-FP4 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-throughput.yaml`
DeepSeek-R1 (NVFP4)	B200, GB200	Min Latency	deepseek-r1-latency.yaml	`trtllm-serve nvidia/DeepSeek-R1-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/deepseek-r1-latency.yaml`	deepseek-r1-latency.yaml	`trtllm-serve nvidia/DeepSeek-R1-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/deepseek-r1-latency.yaml`
gpt-oss-120b	Any	Max Throughput	gpt-oss-120b-throughput.yaml	`trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/gpt-oss-120b-throughput.yaml`	gpt-oss-120b-throughput.yaml	`trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-throughput.yaml`
gpt-oss-120b	Any	Min Latency	gpt-oss-120b-latency.yaml	`trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/gpt-oss-120b-latency.yaml`	gpt-oss-120b-latency.yaml	`trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/gpt-oss-120b-latency.yaml`
Qwen3-Next-80B-A3B-Thinking	Any	Max Throughput	qwen3-next.yaml	`trtllm-serve Qwen/Qwen3-Next-80B-A3B-Thinking --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/qwen3-next.yaml`	qwen3-next.yaml	`trtllm-serve Qwen/Qwen3-Next-80B-A3B-Thinking --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/qwen3-next.yaml`
Qwen3 family (e.g. Qwen3-30B-A3B)	Any	Max Throughput	qwen3.yaml	`trtllm-serve Qwen/Qwen3-30B-A3B --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/qwen3.yaml` (swap to another Qwen3 model name as needed)	qwen3.yaml	`trtllm-serve Qwen/Qwen3-30B-A3B --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/qwen3.yaml` (swap to another Qwen3 model name as needed)
Llama-3.3-70B (FP8)	Any	Max Throughput	llama-3.3-70b.yaml	`trtllm-serve nvidia/Llama-3.3-70B-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/llama-3.3-70b.yaml`	llama-3.3-70b.yaml	`trtllm-serve nvidia/Llama-3.3-70B-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/llama-3.3-70b.yaml`
Llama 4 Scout (FP8)	Any	Max Throughput	llama-4-scout.yaml	`trtllm-serve nvidia/Llama-4-Scout-17B-16E-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/llama-4-scout.yaml`	llama-4-scout.yaml	`trtllm-serve nvidia/Llama-4-Scout-17B-16E-Instruct-FP8 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/curated/llama-4-scout.yaml`